From ad0a729e047b3cb2f417067e48df710f733803a3 Mon Sep 17 00:00:00 2001 From: Robert Pengelly Date: Mon, 3 Jun 2024 04:35:05 +0100 Subject: [PATCH 1/1] New server --- LICENSE | 24 + Makefile.p32 | 25 + Makefile.pdw | 25 + Makefile.unix | 28 + Makefile.w32 | 21 + README.md | 71 + as.c | 186 ++ as.h | 45 + bin.c | 99 ++ cstr.c | 69 + cstr.h | 20 + eval.c | 620 +++++++ eval.h | 9 + expr.c | 1395 +++++++++++++++ expr.h | 87 + fixup.c | 627 +++++++ fixup.h | 31 + frag.c | 197 +++ frag.h | 54 + hashtab.c | 215 +++ hashtab.h | 36 + intel.c | 4515 +++++++++++++++++++++++++++++++++++++++++++++++++ intel.h | 125 ++ kwd.c | 634 +++++++ kwd.h | 19 + lex.c | 35 + lex.h | 18 + lib.c | 542 ++++++ lib.h | 40 + list.c | 39 + list.h | 16 + listing.c | 304 ++++ listing.h | 15 + ll.c | 316 ++++ ll.h | 13 + macro.c | 555 ++++++ macro.h | 34 + obj.c | 302 ++++ obj.h | 46 + process.c | 1494 ++++++++++++++++ process.h | 10 + report.c | 181 ++ report.h | 27 + section.c | 139 ++ section.h | 42 + symbol.c | 788 +++++++++ symbol.h | 69 + vector.c | 54 + vector.h | 19 + 49 files changed, 14275 insertions(+) create mode 100644 LICENSE create mode 100644 Makefile.p32 create mode 100644 Makefile.pdw create mode 100644 Makefile.unix create mode 100644 Makefile.w32 create mode 100644 README.md create mode 100644 as.c create mode 100644 as.h create mode 100644 bin.c create mode 100644 cstr.c create mode 100644 cstr.h create mode 100644 eval.c create mode 100644 eval.h create mode 100644 expr.c create mode 100644 expr.h create mode 100644 fixup.c create mode 100644 fixup.h create mode 100644 frag.c create mode 100644 frag.h create mode 100644 hashtab.c create mode 100644 hashtab.h create mode 100644 intel.c create mode 100644 intel.h create mode 100644 kwd.c create mode 100644 kwd.h create mode 100644 lex.c create mode 100644 lex.h create mode 100644 lib.c create mode 100644 lib.h create mode 100644 list.c create mode 100644 list.h create mode 100644 listing.c create mode 100644 listing.h create mode 100644 ll.c create mode 100644 ll.h create mode 100644 macro.c create mode 100644 macro.h create mode 100644 obj.c create mode 100644 obj.h create mode 100644 process.c create mode 100644 process.h create mode 100644 report.c create mode 100644 report.h create mode 100644 section.c create mode 100644 section.h create mode 100644 symbol.c create mode 100644 symbol.h create mode 100644 vector.c create mode 100644 vector.h diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fdddb29 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/Makefile.p32 b/Makefile.p32 new file mode 100644 index 0000000..8e6b41e --- /dev/null +++ b/Makefile.p32 @@ -0,0 +1,25 @@ +#****************************************************************************** +# @file Makefile.p32 +#****************************************************************************** +AS=as386 +CC=gcc386 +LD=ld386 + +COPTS=-S -O2 -fno-common -ansi -I. -I./include -I../pdos/pdpclib -I../pdos/src -D__PDOS386__ -D__32BIT__ -D__NOBIVA__ -D__PDOS__ -Wall -Werror -ansi -m32 -pedantic +COBJ=as.o bin.o cstr.o eval.o expr.o fixup.o frag.o hashtab.o intel.o kwd.o lex.o lib.o list.o listing.o ll.o macro.o obj.o process.o report.o section.o symbol.o vector.o + +all: clean sasm.exe + +sasm.exe: $(COBJ) + $(LD) -s -o sasm.exe ../pdos/pdpclib/pdosst32.o $(COBJ) ../pdos/pdpclib/pdos.a + +.c.o: + $(CC) $(COPTS) -o $*.s $< + $(AS) -o $@ $*.s + rm -f $*.s + +clean: + for %f in ($(COBJ)) do ( rm -f %f ) + + rm -f sasm + rm -f sasm.exe diff --git a/Makefile.pdw b/Makefile.pdw new file mode 100644 index 0000000..73915de --- /dev/null +++ b/Makefile.pdw @@ -0,0 +1,25 @@ +#****************************************************************************** +# @file Makefile.pdw +#****************************************************************************** +AS=aswin +CC=gccwin +LD=ldwin + +COPTS=-S -O2 -fno-common -ansi -I. -I./include -I../pdos/pdpclib -I../pdos/src -D__WIN32__ -D__NOBIVA__ -D__PDOS__ -Wall -Werror -ansi -m32 -pedantic +COBJ=as.o bin.o cstr.o eval.o expr.o fixup.o frag.o hashtab.o intel.o kwd.o lex.o lib.o list.o listing.o ll.o macro.o obj.o process.o report.o section.o symbol.o vector.o + +all: clean sasm.exe + +sasm.exe: $(COBJ) + $(LD) -s -o sasm.exe ../pdos/pdpclib/w32start.o $(COBJ) ../pdos/pdpclib/msvcrt.a ../pdos/src/kernel32.a + +.c.o: + $(CC) $(COPTS) -o $*.s $< + $(AS) -o $@ $*.s + rm -f $*.s + +clean: + for %f in ($(COBJ)) do ( rm -f %f ) + + rm -f sasm + rm -f sasm.exe diff --git a/Makefile.unix b/Makefile.unix new file mode 100644 index 0000000..44941e1 --- /dev/null +++ b/Makefile.unix @@ -0,0 +1,28 @@ +#****************************************************************************** +# @file Makefile.unix +#****************************************************************************** +OBJDIR ?= $(CURDIR) +SRCDIR ?= $(CURDIR) + +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -I$(OBJDIR) -I$(SRCDIR)/include -O2 -Wall -Werror -Wextra -ansi -pedantic -std=c90 + +CSRC := as.c bin.c cstr.c eval.c expr.c fixup.c frag.c hashtab.c intel.c kwd.c lex.c lib.c list.c listing.c ll.c macro.c obj.c process.c report.c section.c symbol.c vector.c + +ifeq ($(OS), Windows_NT) +all: sasm.exe + +sasm.exe: $(CSRC) + $(CC) $(CFLAGS) -o $@ $^ +else +all: sasm + +sasm: $(CSRC) + $(CC) $(CFLAGS) -o $@ $^ +endif + +clean: + if [ -f sasm ]; then rm -rf sasm; fi + if [ -f sasm.exe ]; then rm -rf sasm.exe; fi diff --git a/Makefile.w32 b/Makefile.w32 new file mode 100644 index 0000000..c70d0cf --- /dev/null +++ b/Makefile.w32 @@ -0,0 +1,21 @@ +#****************************************************************************** +# @file Makefile.w32 +#****************************************************************************** +OBJDIR ?= $(CURDIR) +SRCDIR ?= $(CURDIR) + +VPATH := $(SRCDIR) + +CC := gcc +CFLAGS := -D_FILE_OFFSET_BITS=64 -I$(OBJDIR) -I$(SRCDIR)/include -O2 -Wall -Werror -Wextra -ansi -pedantic -std=c90 + +CSRC := as.c bin.c cstr.c eval.c expr.c fixup.c frag.c hashtab.c intel.c kwd.c lex.c lib.c list.c listing.c ll.c macro.c obj.c process.c report.c section.c symbol.c vector.c + +all: sasm.exe + +clean: + if exist sasm ( del /q sasm ) + if exist sasm.exe ( del /q sasm.exe ) + +sasm.exe: $(CSRC) + $(CC) $(CFLAGS) -o $@ $^ diff --git a/README.md b/README.md new file mode 100644 index 0000000..fd9d8f8 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +## What is sasm? + + Small Assembler (SASM) is a very small assembeler for the i80x + line of processors. + +## License + + All source code is Public Domain. + +## Obtain the source code + + git clone https://git.candlhat.org/sasm.git + +## Building + + BSD: + + Make sure you have gcc and gmake installed then run gmake -f Makefile.unix. + + Linux: + + Make sure you have gcc and make installed then run make -f Makefile.unix. + + macOS: + + Make sure you have xcode command line tools installed then run + make -f Makefile.unix. + + Windows: + + Make sure you have mingw installed and the location within your PATH variable + then run mingw32-make.exe -f Makefile.w32. + +## Usage + + Example (comments and pre-process directives): + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; This is a comment. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + %define SOMETHING 5 + + %if SOMETHING == 5 + ... + %endif + + Code Directives: + + .8086 - Only allow 8086 instructions. + .186 - Allow 186 and below instructions. + .286/.286p - Allow 286 and below instructions. + .386/.386p - Allow 386 and below instructions. + .486/.480p - Allow 486 and below instructions. + .586 - Allow 586 and below instructions. + .686 - Allow 686 and below instructions. + + .code/text - Output bytes to the code/text section/segment. + .data - Output bytes to the data section/segment. + .bss - Output bytes to the bss section/segment. + + align - Align the code to a specific boundary. + extern/extrn - Define a symbol that's in a different file. + global/public - Make the specified symbol available + + db - Output a byte to the object file. + dw - Output a word to the object file. + dd - Output a dword to the object file. + + If you want to pad the file, you need the following: + + db [padding amount] dup [byte to pad with] diff --git a/as.c b/as.c new file mode 100644 index 0000000..3c15e73 --- /dev/null +++ b/as.c @@ -0,0 +1,186 @@ +/****************************************************************************** + * @file as.c + *****************************************************************************/ +#include +#include + +#include "as.h" +#include "lex.h" +#include "lib.h" +#include "listing.h" +#include "process.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +struct as_state *state = 0; +const char *program_name = 0; + +extern void output_binary (FILE *fp); +extern void output_obj (FILE *fp); + +extern void keywords_init (void); +extern void sections_init (void); + +static void cleanup (void) { + + if (state->ofp) { fclose (state->ofp); } + + if (get_error_count () > 0) { + + if (state->ofile) { + remove (state->ofile); + } + + if (state->lfile) { + remove (state->lfile); + } + + } + +} + +extern void fixup_code (void); +extern void machine_dependent_init (void); + +int main (int argc, char **argv) { + + struct symbol *symbol; + char *p, *root; + + if (argc && *argv) { + + program_name = *argv; + + if ((p = strrchr (program_name, '/')) || (p = strrchr (program_name, '\\'))) { + program_name = (p + 1); + } + + } + + atexit (cleanup); + lex_init (); + + state = xmalloc (sizeof (*state)); + parse_args (argc, argv, 1); + + if (!state->ifile) { + + report_at (program_name, 0, REPORT_ERROR, "no input file specified"); + return EXIT_FAILURE; + + } + + if (state->ifile && strcmp (state->ifile, "-")) { + + if ((p = strrchr (state->ifile, '/')) || (p = strrchr (state->ifile, '\\'))) { + + unsigned int len = p - state->ifile; + + root = xmalloc (len + 2); + sprintf (root, "%.*s/", (int) len, state->ifile); + + add_include_path (root); + free (root); + + } + + } + + machine_dependent_init (); + + keywords_init (); + sections_init (); + + if (preprocess_init ()) { + return EXIT_FAILURE; + } + + process_file (state->ifile); + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + fixup_code (); + + if (state->lfile) { + generate_listing (); + } + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_external (symbol) && symbol_get_section (symbol) == undefined_section) { + + if (symbol->scope == SYMBOL_SCOPE_GLOBAL) { + report_at (program_name, 0, REPORT_ERROR, "undefined global symbol '%s'", symbol->name); + } + + } + + } + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + if (state->format == AS_OUTPUT_BIN) { + + int report_output = 1; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if ((symbol_is_external (symbol) && symbol_get_section (symbol) == undefined_section) || symbol_is_undefined (symbol)) { + + if (symbol->scope == SYMBOL_SCOPE_GLOBAL) { + + report_at (program_name, 0, REPORT_ERROR, "undefined global symbol '%s'", symbol->name); + continue; + + } + + if (report_output) { + + report_at (program_name, 0, REPORT_ERROR, "%s output does not support external references", state->format == AS_OUTPUT_BIN ? "binary" : "com"); + report_output = 0; + + } + + report_at (program_name, 0, REPORT_ERROR, "undefined external symbol '%s'", symbol->name); + + } + + } + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + if (!(state->ofp = fopen (state->ofile, "wb"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", state->ofile); + return EXIT_FAILURE; + + } + + output_binary (state->ofp); + return EXIT_SUCCESS; + + } + + if (!(state->ofp = fopen (state->ofile, "wb"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", state->ofile); + return EXIT_FAILURE; + + } + + output_obj (state->ofp); + + if (get_error_count () > 0) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; + +} diff --git a/as.h b/as.h new file mode 100644 index 0000000..d33151c --- /dev/null +++ b/as.h @@ -0,0 +1,45 @@ +/****************************************************************************** + * @file as.h + *****************************************************************************/ +#ifndef _AS_H +#define _AS_H + +#include + +#include "list.h" +#include "symbol.h" +#include "vector.h" + +struct proc { + + struct vector regs, args; + char *name; + + char *filename; + unsigned long line_number; + +}; + +#define AS_OUTPUT_OBJ 0x00 +#define AS_OUTPUT_BIN 0x01 + +struct as_state { + + const char *ifile, *ofile, *lfile; + FILE *ofp; + + struct list *pplist; + int model, data_size; + + struct vector procs; + char *ext; + + struct symbol *end_symbol; + int format; + +}; + +extern struct as_state *state; +extern const char *program_name; + +#endif /* _AS_H */ diff --git a/bin.c b/bin.c new file mode 100644 index 0000000..a22eaa3 --- /dev/null +++ b/bin.c @@ -0,0 +1,99 @@ +/****************************************************************************** + * @file bin.c + *****************************************************************************/ +#include "limits.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" + +#include "as.h" +#include "fixup.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static unsigned long text_size = 0, data_size = 0, bss_size = 0; +static void *output = 0; + +void output_binary (FILE *fp) { + + unsigned long i = 0; + struct frag *frag; + + section_set (text_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + text_size += frag->fixed_size; + + } + + section_set (data_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + data_size += frag->fixed_size; + + } + + section_set (bss_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + bss_size += frag->fixed_size; + + } + + output = xmalloc (text_size + data_size); + section_set (text_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + memcpy ((unsigned char *) output + i, frag->buf, frag->fixed_size); + i += frag->fixed_size; + + } + + section_set (data_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + memcpy ((unsigned char *) output + i, frag->buf, frag->fixed_size); + i += frag->fixed_size; + + } + + if (get_error_count () > 0) { + exit (EXIT_FAILURE); + } + + if (fwrite (output, text_size + data_size, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write data to '%s'", state->ofile); + exit (EXIT_FAILURE); + + } + +} diff --git a/cstr.c b/cstr.c new file mode 100644 index 0000000..d518f2e --- /dev/null +++ b/cstr.c @@ -0,0 +1,69 @@ +/****************************************************************************** + * @file cstr.c + *****************************************************************************/ +#include +#include + +#include "cstr.h" + +extern void *xrealloc (void *__ptr, unsigned int __size); + +static void cstr_realloc (struct cstring *cstr, int new_size) { + + int size = cstr->size_allocated; + + if (size < 8) { + size = 8; + } + + while (size < new_size) { + size *= 2; + } + + cstr->data = xrealloc (cstr->data, size); + cstr->size_allocated = size; + +} + +void cstr_ccat (struct cstring *cstr, int ch) { + + int size = cstr->size + 1; + + if (size > cstr->size_allocated) { + cstr_realloc (cstr, size); + } + + ((unsigned char *) cstr->data)[size - 1] = ch; + cstr->size = size; + +} + +void cstr_cat (struct cstring *cstr, const char *str, int len) { + + int size; + + if (len <= 0) { + len = strlen (str) + 1 + len; + } + + size = cstr->size + len; + + if (size > cstr->size_allocated) { + cstr_realloc (cstr, size); + } + + memmove (((unsigned char *) cstr->data) + cstr->size, str, len); + cstr->size = size; + +} + +void cstr_new (struct cstring *cstr) { + memset (cstr, 0, sizeof (struct cstring)); +} + +void cstr_free (struct cstring *cstr) { + + free (cstr->data); + cstr_new (cstr); + +} diff --git a/cstr.h b/cstr.h new file mode 100644 index 0000000..2736720 --- /dev/null +++ b/cstr.h @@ -0,0 +1,20 @@ +/****************************************************************************** + * @file cstr.h + *****************************************************************************/ +#ifndef _CSTR_H +#define _CSTR_H + +struct cstring { + + int size, size_allocated; + void *data; + +}; + +void cstr_ccat (struct cstring *cstr, int ch); +void cstr_cat (struct cstring *cstr, const char *str, int len); + +void cstr_new (struct cstring *cstr); +void cstr_free (struct cstring *cstr); + +#endif /* _CSTR_H */ diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..f0fd20e --- /dev/null +++ b/eval.c @@ -0,0 +1,620 @@ +/****************************************************************************** + * @file eval.c + *****************************************************************************/ +#include +#include +#include +#include +#include + +#include "as.h" +#include "eval.h" +#include "lex.h" +#include "lib.h" +#include "macro.h" +#include "report.h" + +static unsigned int eval_expr (unsigned int lhs, char *start, char **pp, int outer_prec); + +static unsigned int eval_unary (unsigned int lhs, char *start, char **pp) { + + *pp = skip_whitespace (*pp); + + if (isdigit ((int) **pp)) { + + unsigned int temp, temp2; + int ch; + + if ((*pp)[0] == '0' && tolower ((int) (*pp)[1]) == 'x') { + + unsigned int base = 16; + *pp += 2; + + while (isxdigit ((int) **pp)) { + + temp = lhs * base; + ch = *((*pp)++); + + if (ch >= '0' && ch <= '9') { + temp2 = ch - '0'; + } else { + temp2 = (ch & 0xdf) - ('A' - 10); + } + + lhs = temp + temp2; + + } + + } else if ((*pp)[0] == '0') { + + unsigned int base = 8; + + while (isdigit ((int) **pp)) { + + temp = lhs * base; + lhs = (temp + (*((*pp)++) - '0')); + + } + + } else { + + unsigned int base = 10; + + while (isdigit ((int) **pp)) { + + temp = lhs * base; + lhs = (temp + (*((*pp)++) - '0')); + + } + + } + + return lhs; + + } + + if (is_name_beginner ((int) **pp)) { + + char *sname, *caret; + + struct hashtab_name *key; + struct macro *m; + + caret = *pp; + + while (is_name_part ((int) *caret)) { + caret++; + } + + if (memcmp (*pp, "defined", caret - *pp) == 0) { + + caret = skip_whitespace (caret); + *pp = caret; + + if (*caret == '(') { + + caret = skip_whitespace (caret + 1); + *pp = caret; + + while (!is_end_of_line[(int) *caret]) { + + if (isspace ((int) *caret) || *caret == ')') { break; } + caret++; + + } + + sname = xstrndup (*pp, caret - *pp); + caret = skip_whitespace (caret); + + if (*caret != ')') { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "missing ')' after \"defined\""); + } + + } else { + sname = xstrndup (*pp, caret - *pp); + } + + if (*caret == ')') { caret++; } + *pp = skip_whitespace (caret); + + (void) sname; + (void) key; + + lhs = (find_macro (sname) != NULL); + return lhs; + + } + + sname = xstrndup (*pp, caret - *pp); + *pp = skip_whitespace (caret); + + if ((key = find_macro (sname))) { + + if ((m = get_macro (key))) { + + char *temp = process_macro (start, pp, m); + lhs = eval_unary (lhs, temp, &temp); + + } + + } + + return lhs; + + } + + if (**pp == '@') { + + char *arg, *temp = (*pp + 1); + + if ((arg = symname (&temp))) { + + if (xstrcasecmp (arg, "DataSize") == 0) { + + *pp = temp; + free (arg); + + return state->data_size; + + } + + if (xstrcasecmp (arg, "Model") == 0) { + + *pp = temp; + free (arg); + + return (state->model > 0 ? state->model : 1); + + } + + free (arg); + + } + + } + + if (**pp == '!') { + + unsigned int temp = 0; + + *pp = skip_whitespace (*pp + 1); + temp = eval_unary (temp, start, pp); + + lhs = (temp == 0); + return lhs; + + } + + if (**pp == '~') { + + int flip_bits = 0; + + while (**pp == '~') { + + flip_bits = !flip_bits; + *pp = skip_whitespace (*pp + 1); + + } + + lhs = eval_unary (lhs, start, pp); + + if (flip_bits) { + lhs = ~lhs; + } + + return lhs; + + } + + if (**pp == '-') { + + int sign = 1; + + while (**pp == '-' || **pp == '+') { + + if (**pp == '-') { sign = !sign; } + *pp = skip_whitespace (*pp + 1); + + } + + lhs = eval_unary (lhs, start, pp); + + if (!sign) { + lhs = -lhs; + } + + return lhs; + + } + + if (**pp == '(') { + + char *caret = (*pp)++; + int depth = 0; + + while (!is_end_of_line[(int) **pp]) { + + if (**pp == '(') { + + (*pp)++; + + depth++; + continue; + + } + + if (**pp == ')') { + + if (depth > 0) { + + (*pp)++; + + depth--; + continue; + + } + + break; + + } + + (*pp)++; + + } + + if (**pp != ')') { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, caret, "missing ')' in expression"); + } else { + (*pp)++; + } + + sprintf (caret, "%.*s", (int) (*pp - caret - 1), caret + 1); + + lhs = eval_unary (lhs, start, &caret); + lhs = eval_expr (lhs, start, &caret, 15); + + return lhs; + + } + + if (!is_end_of_line[(int) **pp]) { + + report_line_at (get_filename (), get_line_number (), REPORT_INTERNAL_ERROR, start, *pp, "unexpected %c character", **pp); + + while (!is_end_of_line[(int) **pp]) { + (*pp)++; + } + + } + + return lhs; + +} + +#define OP_MUL 0 +#define OP_DIV 1 +#define OP_MOD 2 +#define OP_PLUS 3 +#define OP_MINUS 4 +#define OP_LT 5 +#define OP_GT 6 +#define OP_LTEQ 7 +#define OP_GTEQ 8 +#define OP_EQEQ 9 +#define OP_NOTEQ 11 +#define OP_AND 12 +#define OP_XOR 13 +#define OP_OR 14 +#define OP_ANDAND 15 +#define OP_OROR 16 +#define OP_LSHIFT 17 +#define OP_RSHIFT 18 +#define OP_QUEST 19 +#define OP_MAX 20 + +struct op { + + char *word; + int kind; + +}; + +static struct op *get_op (char **pp) { + + static struct op kws[] = { + + { "<=", OP_LTEQ }, + { ">=", OP_GTEQ }, + { "==", OP_EQEQ }, + { "!=", OP_NOTEQ }, + { "&&", OP_ANDAND }, + { "||", OP_OROR }, + { "<<", OP_LSHIFT }, + { ">>", OP_RSHIFT }, + + { "*", OP_MUL }, + { "/", OP_DIV }, + { "%", OP_MOD }, + { "+", OP_PLUS }, + { "-", OP_MINUS }, + { "<", OP_LT }, + { ">", OP_GT }, + { "&", OP_AND }, + { "^", OP_XOR }, + { "|", OP_OR }, + { "?", OP_QUEST } + + }; + + struct op *kw; + unsigned int i; + + for (i = 0; i < (sizeof (kws) / sizeof (*kws)); i++) { + + kw = &kws[i]; + + if (strncmp (*pp, kw->word, strlen (kw->word)) == 0) { + + *pp += strlen (kw->word); + return kw; + + } + + } + + return 0; + +} + +static int get_prec (int kind) { + + switch (kind) { + + case OP_MUL: case OP_DIV: case OP_MOD: + + return 3; + + case OP_PLUS: case OP_MINUS: + + return 4; + + case OP_LSHIFT: case OP_RSHIFT: + + return 5; + + case OP_LT: case OP_GT: case OP_LTEQ: case OP_GTEQ: + + return 6; + + case OP_EQEQ: case OP_NOTEQ: + + return 7; + + case OP_AND: + + return 8; + + case OP_XOR: + + return 9; + + case OP_OR: + + return 10; + + case OP_ANDAND: + + return 11; + + case OP_OROR: + + return 12; + + case OP_QUEST: + + return 13; + + default: + + break; + + } + + return 100; + +} + +static unsigned int eval_expr (unsigned int lhs, char *start, char **pp, int outer_prec) { + + struct op *op1, *op2; + unsigned int rhs; + + int prec, look_ahead; + + for (;;) { + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp]) { + break; + } + + op1 = get_op (pp); + + if (!op1 || (prec = get_prec (op1->kind)) > outer_prec) { + + if (op1) { *pp -= strlen (op1->word); } + break; + + } + + *pp = skip_whitespace (*pp); + + if (op1->kind == OP_QUEST) { + + unsigned int left = 0, right = 0; + + left = eval_unary (left, start, pp); + left = eval_expr (left, start, pp, 14); + + *pp = skip_whitespace (*pp); + assert (**pp == ':'); + *pp = skip_whitespace (*pp + 1); + + right = eval_unary (right, start, pp); + right = eval_expr (right, start, pp, 14); + + if (lhs != 0) { + lhs = left; + } else { + lhs = right; + } + + continue; + + } + + if (is_end_of_line[(int) **pp]) { + break; + } + + rhs = 0; + rhs = eval_unary (rhs, start, pp); + + for (;;) { + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp]) { + break; + } + + op2 = get_op (pp); + + if (!op2 || (look_ahead = get_prec (op2->kind)) > prec) { + + if (op2) { *pp -= strlen (op2->word); } + break; + + } + + *pp = skip_whitespace (*pp); + rhs = eval_expr (rhs, start, pp, look_ahead); + + } + + switch (op1->kind) { + + case OP_MUL: + + lhs *= rhs; + break; + + case OP_DIV: + + lhs /= rhs; + break; + + case OP_MOD: + + lhs %= rhs; + break; + + case OP_PLUS: + + lhs += rhs; + break; + + case OP_MINUS: + + lhs -= rhs; + break; + + case OP_LT: + + lhs = (lhs < rhs); + break; + + case OP_GT: + + lhs = (lhs > rhs); + break; + + case OP_LTEQ: + + lhs = (lhs <= rhs); + break; + + case OP_GTEQ: + + lhs = (lhs >= rhs); + break; + + case OP_EQEQ: + + lhs = (lhs == rhs); + break; + + case OP_NOTEQ: + + lhs = (lhs != rhs); + break; + + case OP_AND: + + lhs &= rhs; + break; + + case OP_XOR: + + lhs ^= rhs; + break; + + case OP_OR: + + lhs |= rhs; + break; + + case OP_ANDAND: + + lhs = ((lhs != 0) && (rhs != 0)); + break; + + case OP_OROR: + + lhs = ((lhs != 0) || (rhs != 0)); + break; + + case OP_LSHIFT: + + lhs <<= rhs; + break; + + case OP_RSHIFT: + + lhs >>= rhs; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "unimplemented"); + break; + + } + + } + + return lhs; + +} + +int eval (char *start, char **pp) { + + unsigned int lhs = 0; + + lhs = eval_unary (lhs, start, pp); + lhs = eval_expr (lhs, start, pp, 15); + + return (lhs != 0); + +} diff --git a/eval.h b/eval.h new file mode 100644 index 0000000..9361938 --- /dev/null +++ b/eval.h @@ -0,0 +1,9 @@ +/****************************************************************************** + * @file eval.h + *****************************************************************************/ +#ifndef _EVAL_H +#define _EVAL_H + +int eval (char *start, char **pp); + +#endif /* _EVAL_H */ diff --git a/expr.c b/expr.c new file mode 100644 index 0000000..6e944d9 --- /dev/null +++ b/expr.c @@ -0,0 +1,1395 @@ +/****************************************************************************** + * @file expr.c + *****************************************************************************/ +#include +#include +#include + +#include "expr.h" +#include "frag.h" +#include "lex.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +/** + * Expression symbols are mapped to file positions to provide + * better error messages. + */ +struct expr_symbol_line { + + struct symbol *symbol; + + const char *filename; + unsigned long line_number; + + struct expr_symbol_line *next; + +}; + +static struct expr_symbol_line *expr_symbol_lines = 0; + +static char *read_character (const char *start, char *p, unsigned long *ch) { + + if (*p == '\\') { + + p++; + + if (*p == '\'') { + + *ch = '\''; + p++; + + } else if (*p == '\"') { + + *ch = '"'; + p++; + + } else if (*p == '\\') { + + *ch = '\\'; + p++; + + } else if (*p == 'a') { + + *ch = 0x07; + p++; + + } else if (*p == 'b') { + + *ch = 0x08; + p++; + + } else if (*p == 't') { + + *ch = 0x09; + p++; + + } else if (*p == 'n') { + + *ch = 0x0a; + p++; + + } else if (*p == 'v') { + + *ch = 0x0b; + p++; + + } else if (*p == 'f') { + + *ch = 0x0c; + p++; + + } else if (*p == 'r') { + + *ch = 0x0d; + p++; + + } else if (*p == 'e') { + + *ch = 0x1b; + p++; + + } else if (*p >= '0' && *p <= '7') { + + unsigned long i = 0; + *ch = 0; + + while (*p >= '0' && *p <= '7') { + + if (++i > 3) { + break; + } + + *ch = *ch * 8 + (*p - '0'); + p++; + + } + + } else { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, p - 1, "unknown escape sequence: '\\%c'", *p); + *ch = *p++; + + } + + } else { + *ch = *p++; + } + + return p; + +} + +static int chrpos (char *s, int ch) { + + char *p = strchr (s, ch); + return p ? p - s : -1; + +} + +static void integer_constant (char *start, char **pp, struct expr *expr, int radix) { + + long value = 0; + int k; + + while (!is_end_of_line[(int) **pp] && (k = chrpos ("0123456789abcdef", tolower ((int) **pp))) >= 0) { + + if (k >= radix) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "invalid digit in integer literal"); + } + + value = value * radix + k; + (*pp)++; + + } + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = value; + +} + +enum expr_type machine_dependent_parse_operator (char **pp, char *name, char *original_saved_c, unsigned int operands); + +int machine_dependent_need_index_operator (void); +int machine_dependent_parse_name (char **pp, struct expr *expr, char *name, char *original_saved_c); + +void machine_dependent_parse_operand (char *start, char **pp, struct expr *expr); + +static enum expr_type operator (char *p, unsigned int *operator_size) { + + char *start; + enum expr_type ret; + + if (is_name_beginner ((int) *p)) { + + char *name; + char c; + + name = p; + + c = get_symbol_name_end (&p); + ret = machine_dependent_parse_operator (&p, name, &c, 2); + + switch (ret) { + + case EXPR_TYPE_ABSENT: + + *p = c; + + p = name; + break; + + default: + + *p = c; + *operator_size = p - name; + + return ret; + + } + + } + + switch (*p) { + + case '+': + + *operator_size = 1; + return EXPR_TYPE_ADD; + + case '-': + + *operator_size = 1; + return EXPR_TYPE_SUBTRACT; + + case '<': + + switch (p[1]) { + + case '<': + + *operator_size = 2; + return EXPR_TYPE_LEFT_SHIFT; + + case '>': + + *operator_size = 2; + return EXPR_TYPE_NOT_EQUAL; + + case '=': + + *operator_size = 2; + return EXPR_TYPE_LESSER_EQUAL; + + } + + *operator_size = 1; + return EXPR_TYPE_LESSER; + + case '>': + + switch (p[1]) { + + case '>': + + *operator_size = 2; + return EXPR_TYPE_RIGHT_SHIFT; + + case '=': + + *operator_size = 2; + return EXPR_TYPE_GREATER_EQUAL; + + } + + *operator_size = 1; + return EXPR_TYPE_GREATER; + + case '=': + + if (p[1] != '=') { + + *operator_size = 0; + return EXPR_TYPE_INVALID; + + } + + *operator_size = 2; + return EXPR_TYPE_EQUAL; + + case '!': + + if (p[1] != '=') { + + *operator_size = 0; + return EXPR_TYPE_INVALID; + + } + + *operator_size = 2; + return EXPR_TYPE_NOT_EQUAL; + + case '|': + + if (p[1] != '|') { + + *operator_size = 1; + return EXPR_TYPE_BIT_INCLUSIVE_OR; + + } + + *operator_size = 2; + return EXPR_TYPE_LOGICAL_OR; + + case '&': + + if (p[1] != '&') { + + *operator_size = 1; + return EXPR_TYPE_BIT_AND; + + } + + *operator_size = 2; + return EXPR_TYPE_LOGICAL_AND; + + case '/': + + *operator_size = 1; + return EXPR_TYPE_DIVIDE; + + case '%': + + *operator_size = 1; + return EXPR_TYPE_MODULUS; + + case '*': + + *operator_size = 1; + return EXPR_TYPE_MULTIPLY; + + case '^': + + *operator_size = 1; + return EXPR_TYPE_BIT_EXCLUSIVE_OR; + + default: + + start = p; + + ret = machine_dependent_parse_operator (&p, 0, 0, 2); + *operator_size = p - start; + + return ret; + + } + +} + +static struct section *operand (char *start, char **pp, struct expr *expr, int expr_mode) { + + struct section *ret_section; + char ch; + + expr->type = EXPR_TYPE_INVALID; + expr->add_number = 0; + + expr->add_symbol = 0; + expr->op_symbol = 0; + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp]) { + goto end_of_line; + } + + switch (**pp) { + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + integer_constant (start, pp, expr, 10); + break; + + case '0': + + (*pp)++; + + switch (**pp) { + + case 'X': + case 'x': + + (*pp)++; + + integer_constant (start, pp, expr, 16); + break; + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + + integer_constant (start, pp, expr, 8); + break; + + case 'B': + case 'b': + + if ((*pp)[1] == '0' || (*pp)[1] == '1') { + + (*pp)++; + + integer_constant (start, pp, expr, 2); + break; + + } + + /* fall through */ + + default: + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + break; + + } + + break; + + case '[': + + if (machine_dependent_need_index_operator ()) { + goto default_; + } + + /* fall through */ + + case '(': + + ch = (*pp)++[0]; + ret_section = read_into (start, pp, expr, 0, expr_mode); + + if ((ch == '(' && (*pp)[0] != ')') || (ch == '[' && (*pp)[0] != ']')) { + + if ((*pp)[0]) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "found '%c' but expected '%c'", (*pp)[0], (ch == '(' ? ')' : ']')); + } else { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "missing '%c'", (ch == '(' ? ')' : ']')); + } + + } else { + (*pp)++; + } + + *pp = skip_whitespace (*pp); + return ret_section; + + case '+': + case '-': + case '~': + case '!': + + ch = *((*pp)++); + + unary: + + operand (start, pp, expr, expr_mode); + + if (expr->type == EXPR_TYPE_CONSTANT) { + + switch (ch) { + + case '-': + + expr->add_number = -expr->add_number; + break; + + case '~': + + expr->add_number = ~expr->add_number; + break; + + case '!': + + expr->add_number = !expr->add_number; + break; + + } + + } else if (expr->type != EXPR_TYPE_INVALID && expr->type != EXPR_TYPE_ABSENT) { + + if (ch != '+') { + + expr->add_symbol = make_expr_symbol (expr); + expr->op_symbol = 0; + + expr->add_number = 0; + + switch (ch) { + + case '-': + + expr->type = EXPR_TYPE_UNARY_MINUS; + break; + + case '~': + + expr->type = EXPR_TYPE_BIT_NOT; + break; + + case '!': + + expr->type = EXPR_TYPE_LOGICAL_NOT; + break; + + } + + } + + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "unary operator %c ignored because bad operand follows", ch); + } + + break; + + case '\'': + + *pp = read_character (start, ++(*pp), &expr->add_number); + + if (**pp == '\'') { + (*pp)++; + } + + expr->type = EXPR_TYPE_CONSTANT; + break; + + case '.': + + if (!is_name_part ((int) (*pp)[1])) { + + current_location (expr); + + (*pp)++; + break; + + } else { + goto is_name; + } + + default: + default_: + + if (is_name_beginner ((int) **pp)) { + + struct symbol *symbol; + char *name; + + is_name: + + name = *pp; + ch = get_symbol_name_end (pp); + + /* Checks in machine dependent way whether the name is an unary oprator. */ + { + + enum expr_type ret = machine_dependent_parse_operator (pp, name, &ch, 1); + + switch (ret) { + + case EXPR_TYPE_UNARY_MINUS: + + **pp = ch; + + ch = '-'; + goto unary; + + case EXPR_TYPE_BIT_NOT: + + **pp = ch; + + ch = '~'; + goto unary; + + case EXPR_TYPE_LOGICAL_NOT: + + **pp = ch; + + ch = '!'; + goto unary; + + case EXPR_TYPE_INVALID: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of operator \"%s\"", name); + break; + + default: + + break; + + } + + if (ret != EXPR_TYPE_INVALID && ret != EXPR_TYPE_ABSENT) { + + **pp = ch; + read_into (start, pp, expr, 9, expr_mode); + + expr->add_symbol = make_expr_symbol (expr); + + expr->add_number = 0; + expr->op_symbol = 0; + + expr->type = ret; + break; + + } + + } + + if (machine_dependent_parse_name (pp, expr, name, &ch)) { + + **pp = ch; + break; + + } + + symbol = symbol_find_or_make (name, SYMBOL_SCOPE_LOCAL); + **pp = ch; + + if (symbol_get_section (symbol) == absolute_section && !symbol_force_reloc (symbol)) { + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = symbol_get_value (symbol); + + } else { + + expr->type = EXPR_TYPE_SYMBOL; + expr->add_symbol = symbol; + + expr->add_number = 0; + + } + + } else { + + expr->type = EXPR_TYPE_ABSENT; + machine_dependent_parse_operand (start, pp, expr); + + if (expr->type == EXPR_TYPE_ABSENT) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "bad expression"); + (*pp)++; + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + } + + } + + break; + + case ',': + end_of_line: + + expr->type = EXPR_TYPE_ABSENT; + break; + + } + + *pp = skip_whitespace (*pp); + + switch (expr->type) { + + case EXPR_TYPE_SYMBOL: + + return symbol_get_section (expr->add_symbol); + + case EXPR_TYPE_REGISTER: + + return reg_section; + + default: + + return absolute_section; + + } + +} + +static unsigned int op_rank_table[EXPR_TYPE_MAX] = { + + 0, /* EXPR_TYPE_INVALID */ + 0, /* EXPR_TYPE_ABSENT */ + 0, /* EXPR_TYPE_CONSTANT */ + 0, /* EXPR_TYPE_SYMBOL */ + 0, /* EXPR_TYPE_REGISTER */ + 1, /* EXPR_TYPE_INDEX */ + 2, /* EXPR_TYPE_LOGICAL_OR */ + 3, /* EXPR_TYPE_LOGICAL_AND */ + 4, /* EXPR_TYPE_EQUAL */ + 4, /* EXPR_TYPE_NOT_EQUAL */ + 4, /* EXPR_TYPE_LESSER */ + 4, /* EXPR_TYPE_LESSER_EQUAL */ + 4, /* EXPR_TYPE_GREATER */ + 4, /* EXPR_TYPE_GREATER_EQUAL */ + 5, /* EXPR_TYPE_ADD */ + 5, /* EXPR_TYPE_SUBTRACT */ + 7, /* EXPR_TYPE_BIT_INCLUSIVE_OR */ + 7, /* EXPR_TYPE_BIT_EXCLUSIVE_OR */ + 7, /* EXPR_TYPE_BIT_AND */ + 8, /* EXPR_TYPE_MULTIPLY */ + 8, /* EXPR_TYPE_DIVIDE */ + 8, /* EXPR_TYPE_MODULUS */ + 8, /* EXPR_TYPE_LEFT_SHIFT */ + 8, /* EXPR_TYPE_RIGHT_SHIFT */ + 9, /* EXPR_TYPE_LOGICAL_NOT */ + 9, /* EXPR_TYPE_BIT_NOT */ + 9 /* EXPR_TYPE_UNARY_MINUS */ + + /* Machine dependent operators default to rank 0 but expr_type_set_rank() can be used to change the rank. */ + +}; + +void expr_type_set_rank (enum expr_type expr_type, unsigned int rank) { + op_rank_table[expr_type] = rank; +} + +struct section *current_location (struct expr *expr) { + + expr->type = EXPR_TYPE_SYMBOL; + expr->add_number = 0; + + expr->add_symbol = symbol_temp_new_now (); + expr->op_symbol = 0; + + return symbol_get_section (expr->add_symbol); + +} + +struct section *read_into (char *start, char **pp, struct expr *expr, unsigned int rank, int expr_mode) { + + enum expr_type left_op; + struct expr right_expr; + + struct section *ret_section; + unsigned int operator_size; + + ret_section = operand (start, pp, expr, expr_mode); + left_op = operator (*pp, &operator_size); + + while (left_op != EXPR_TYPE_INVALID && op_rank_table[left_op] > rank) { + + enum expr_type right_op; + + struct section *right_section; + signed long offset; + + *pp += operator_size; + right_section = read_into (start, pp, &right_expr, op_rank_table[left_op], expr_mode); + + if (right_expr.type == EXPR_TYPE_ABSENT) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "missing operand; zero assumed"); + + right_expr.type = EXPR_TYPE_CONSTANT; + right_expr.add_number = 0; + + right_expr.add_symbol = 0; + right_expr.op_symbol = 0; + + } + + if (left_op == EXPR_TYPE_INDEX) { + + if (**pp != ']') { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "missing ']'"); + } else { + *pp = skip_whitespace (*pp + 1); + } + + } + + right_op = operator (*pp, &operator_size); + + if (left_op == EXPR_TYPE_ADD && right_expr.type == EXPR_TYPE_CONSTANT && expr->type != EXPR_TYPE_REGISTER) { + expr->add_number += right_expr.add_number; + } else if (left_op == EXPR_TYPE_SUBTRACT && right_expr.type == EXPR_TYPE_SYMBOL && expr->type == EXPR_TYPE_SYMBOL && ret_section == right_section && ((SECTION_IS_NORMAL (ret_section) && !symbol_force_reloc (expr->add_symbol) && !symbol_force_reloc (right_expr.add_symbol)) || expr->add_symbol == right_expr.add_symbol) && frags_offset_is_fixed (symbol_get_frag (expr->add_symbol), symbol_get_frag (right_expr.add_symbol), &offset)) { + + expr->add_number += symbol_get_value (expr->add_symbol) - symbol_get_value (right_expr.add_symbol); + expr->add_number -= right_expr.add_number; + expr->add_number -= offset; + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_symbol = 0; + + } else if (left_op == EXPR_TYPE_SUBTRACT && right_expr.type == EXPR_TYPE_CONSTANT && expr->type != EXPR_TYPE_REGISTER) { + expr->add_number -= right_expr.add_number; + } else if (left_op == EXPR_TYPE_ADD && expr->type == EXPR_TYPE_CONSTANT && right_expr.type != EXPR_TYPE_REGISTER) { + + expr->type = right_expr.type; + + expr->add_symbol = right_expr.add_symbol; + expr->op_symbol = right_expr.op_symbol; + + expr->add_number += right_expr.add_number; + ret_section = right_section; + + } else if (expr->type == EXPR_TYPE_CONSTANT && right_expr.type == EXPR_TYPE_CONSTANT) { + + /* Checks for division by zero. */ + if ((left_op == EXPR_TYPE_DIVIDE || left_op == EXPR_TYPE_MODULUS) && right_expr.add_number == 0) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "division by zero"); + right_expr.add_number = 1; + + } + + switch (left_op) { + + case EXPR_TYPE_LOGICAL_OR: + + expr->add_number = expr->add_number || right_expr.add_number; + break; + + case EXPR_TYPE_LOGICAL_AND: + + expr->add_number = expr->add_number && right_expr.add_number; + break; + + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + + expr->add_number = (expr->add_number == right_expr.add_number) ? ~(signed long) 0 : 0; + + if (left_op == EXPR_TYPE_NOT_EQUAL) { + expr->add_number = ~expr->add_number; + } + + break; + + case EXPR_TYPE_LESSER: + + expr->add_number = (signed long) expr->add_number < (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + expr->add_number = (signed long) expr->add_number <= (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER: + + expr->add_number = (signed long) expr->add_number > (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + expr->add_number = (signed long) expr->add_number >= (signed long) right_expr.add_number ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_ADD: + + expr->add_number += right_expr.add_number; + break; + + case EXPR_TYPE_SUBTRACT: + + expr->add_number -= right_expr.add_number; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + expr->add_number |= right_expr.add_number; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + expr->add_number ^= right_expr.add_number; + break; + + case EXPR_TYPE_BIT_AND: + + expr->add_number &= right_expr.add_number; + break; + + case EXPR_TYPE_MULTIPLY: + + expr->add_number *= right_expr.add_number; + break; + + case EXPR_TYPE_DIVIDE: + + expr->add_number /= right_expr.add_number; + break; + + case EXPR_TYPE_MODULUS: + + expr->add_number %= right_expr.add_number; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + expr->add_number = (signed long) (expr->add_number) << (signed long) (right_expr.add_number); + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + expr->add_number = (unsigned long) (expr->add_number) >> (unsigned long) (right_expr.add_number); + break; + + default: + + goto general_case; + + } + + } else if (expr->type == EXPR_TYPE_SYMBOL && right_expr.type == EXPR_TYPE_SYMBOL && (left_op == EXPR_TYPE_ADD || left_op == EXPR_TYPE_SUBTRACT || (expr->add_number == 0 && right_expr.add_number == 0))) { + + expr->type = left_op; + expr->op_symbol = right_expr.add_symbol; + + if (left_op == EXPR_TYPE_ADD) { + expr->add_number += right_expr.add_number; + } else if (left_op == EXPR_TYPE_SUBTRACT) { + + expr->add_number -= right_expr.add_number; + + if (ret_section == right_section && SECTION_IS_NORMAL (ret_section) && !symbol_force_reloc (expr->add_symbol) && !symbol_force_reloc (right_expr.add_symbol)) { + ret_section = right_section = absolute_section; + } + + } + + } else { + + general_case: + + expr->add_symbol = make_expr_symbol (expr); + expr->add_number = 0; + + expr->op_symbol = make_expr_symbol (&right_expr); + expr->type = left_op; + + } + + if (ret_section != right_section) { + + if (ret_section == undefined_section) { + /* Nothing is done. */ + } else if (right_section == undefined_section) { + ret_section = right_section; + } else if (ret_section == expr_section) { + /* Nothing is done. */ + } else if (right_section == expr_section) { + ret_section = right_section; + } else if (ret_section == reg_section) { + /* Nothing is done. */ + } else if (right_section == reg_section) { + ret_section = right_section; + } else if (right_section == absolute_section) { + /* Nothing is done. */ + } else if (ret_section == absolute_section) { + ret_section = right_section; + } else if (left_op == EXPR_TYPE_SUBTRACT) { + /* Nothing is done. */ + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "operation combines symbols in different sections"); + } + + } + + left_op = right_op; + + } + + if (rank == 0 && expr_mode == EXPR_MODE_EVALUATE) { + resolve_expression (expr); + } + + *pp = skip_whitespace (*pp); + return (expr->type == EXPR_TYPE_CONSTANT ? absolute_section : ret_section); + +} + +struct symbol *make_expr_symbol (struct expr *expr) { + + struct expr_symbol_line *es_line; + struct symbol *symbol; + + if (expr->type == EXPR_TYPE_SYMBOL && expr->add_number == 0) { + return expr->add_symbol; + } + + symbol = symbol_create (FAKE_LABEL_NAME, (expr->type == EXPR_TYPE_CONSTANT ? absolute_section : (expr->type == EXPR_TYPE_REGISTER ? reg_section : expr_section)), 0, &zero_address_frag); + symbol_set_value_expression (symbol, expr); + + es_line = xmalloc (sizeof (*es_line)); + + es_line->symbol = symbol; + get_filename_and_line_number (&(es_line->filename), &(es_line->line_number)); + + es_line->next = expr_symbol_lines; + expr_symbol_lines = es_line; + + return symbol; + +} + +signed long absolute_expression_read_into (char *start, char **pp, struct expr *expr) { + + expression_evaluate_and_read_into (start, pp, expr); + + if (expr->type != EXPR_TYPE_CONSTANT) { + + if (expr->type != EXPR_TYPE_ABSENT) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "bad or irreducible absolute expression"); + } + + expr->add_number = 0; + + } + + return expr->add_number; + +} + +int expr_symbol_get_filename_and_line_number (struct symbol *symbol, const char **filename_p, unsigned long *line_number_p) { + + struct expr_symbol_line *es_line; + + for (es_line = expr_symbol_lines; es_line; es_line = es_line->next) { + + if (symbol == es_line->symbol) { + + *filename_p = es_line->filename; + *line_number_p = es_line->line_number; + + return 0; + + } + + } + + return 1; + +} + +signed long get_result_of_absolute_expression (char *start, char **pp) { + + struct expr expr; + return absolute_expression_read_into (start, pp, &expr); + +} + +int resolve_expression (struct expr *expr) { + + struct symbol *op_symbol = expr->op_symbol; + struct frag *left_frag, *right_frag; + + struct symbol *add_symbol = expr->add_symbol; + struct symbol *original_add_symbol = add_symbol; + + unsigned long final_value = expr->add_number; + unsigned long left_value, right_value; + + struct section *left_section, *right_section; + signed long frag_offset; + + switch (expr->type) { + + case EXPR_TYPE_CONSTANT: + case EXPR_TYPE_REGISTER: + + left_value = 0; + break; + + case EXPR_TYPE_SYMBOL: + + if (get_symbol_snapshot (&add_symbol, &left_value, &left_section, &left_frag)) { + return 0; + } + + break; + + case EXPR_TYPE_LOGICAL_NOT: + case EXPR_TYPE_BIT_NOT: + case EXPR_TYPE_UNARY_MINUS: + + if (get_symbol_snapshot (&add_symbol, &left_value, &left_section, &left_frag)) { + return 0; + } + + if (left_section != absolute_section) { + return 0; + } + + switch (expr->type) { + + case EXPR_TYPE_LOGICAL_NOT: + + left_value = !left_value; + break; + + case EXPR_TYPE_BIT_NOT: + + left_value = ~left_value; + break; + + case EXPR_TYPE_UNARY_MINUS: + + left_value = -left_value; + break; + + default: + + break; + + } + + expr->type = EXPR_TYPE_CONSTANT; + break; + + case EXPR_TYPE_LOGICAL_OR: + case EXPR_TYPE_LOGICAL_AND: + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + case EXPR_TYPE_LESSER: + case EXPR_TYPE_LESSER_EQUAL: + case EXPR_TYPE_GREATER: + case EXPR_TYPE_GREATER_EQUAL: + case EXPR_TYPE_ADD: + case EXPR_TYPE_SUBTRACT: + case EXPR_TYPE_BIT_INCLUSIVE_OR: + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + case EXPR_TYPE_BIT_AND: + case EXPR_TYPE_MULTIPLY: + case EXPR_TYPE_DIVIDE: + case EXPR_TYPE_MODULUS: + case EXPR_TYPE_LEFT_SHIFT: + case EXPR_TYPE_RIGHT_SHIFT: + + if (get_symbol_snapshot (&add_symbol, &left_value, &left_section, &left_frag) || get_symbol_snapshot (&op_symbol, &right_value, &right_section, &right_frag)) { + return 0; + } + + if (expr->type == EXPR_TYPE_ADD) { + + if (right_section == absolute_section) { + + final_value += right_value; + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (left_section == absolute_section) { + + final_value += left_value; + left_value = right_value; + + left_section = right_section; + add_symbol = op_symbol; + + original_add_symbol = expr->op_symbol; + expr->type = EXPR_TYPE_SYMBOL; + + break; + + } + + } else if (expr->type == EXPR_TYPE_SUBTRACT) { + + if (right_section == absolute_section) { + + final_value -= right_value; + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } + + } + + frag_offset = 0; + + /** + * Equality and non-equality operations are allowed on everything. + * Subtraction and other comparison operators are allowed if both operands are in the same section. + * Bit OR, bit AND and multiplications are permitted with constant zero are permitted on anything. + * Shifts by constant zero are permitted on anything. + * Multiplication and division by constant one are permitted on anything. + * Bit OR and AND are permitted on two same undefined symbols. + * For everything else, both operands must be absolute. + * Addition and subtraction of constants is handled above. + */ + if (!(left_section == absolute_section && + right_section == absolute_section) && + !(expr->type == EXPR_TYPE_EQUAL || + expr->type == EXPR_TYPE_NOT_EQUAL) && + !((expr->type == EXPR_TYPE_SUBTRACT || + expr->type == EXPR_TYPE_LESSER || + expr->type == EXPR_TYPE_LESSER_EQUAL || + expr->type == EXPR_TYPE_GREATER || + expr->type == EXPR_TYPE_GREATER_EQUAL) && + left_section == right_section && + (finalize_symbols || + frags_offset_is_fixed (left_frag, right_frag, &frag_offset) || + (expr->type == EXPR_TYPE_GREATER && + frags_is_greater_than_offset (left_value, left_frag, right_value, right_frag, &frag_offset))) && + (left_section != undefined_section || + add_symbol == op_symbol))) + { + + if ((left_section == absolute_section && left_value == 0) || (right_section == absolute_section && right_value == 0)) { + + if (expr->type == EXPR_TYPE_BIT_INCLUSIVE_OR || expr->type == EXPR_TYPE_BIT_EXCLUSIVE_OR) { + + if (!(right_section == absolute_section && right_value == 0)) { + + left_value = right_value; + + left_section = right_section; + add_symbol = op_symbol; + + original_add_symbol = expr->op_symbol; + + } + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (expr->type == EXPR_TYPE_LEFT_SHIFT || expr->type == EXPR_TYPE_RIGHT_SHIFT) { + + if (!(left_section == absolute_section && left_value == 0)) { + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } + + } else if (expr->type != EXPR_TYPE_BIT_AND && expr->type != EXPR_TYPE_MULTIPLY) { + return 0; + } + + } else if (expr->type == EXPR_TYPE_MULTIPLY && left_section == absolute_section && left_value == 1) { + + left_value = right_value; + + left_section = right_section; + add_symbol = op_symbol; + + original_add_symbol = expr->op_symbol; + break; + + } else if ((expr->type == EXPR_TYPE_MULTIPLY || expr->type == EXPR_TYPE_DIVIDE) && right_section == absolute_section && right_value == 1) { + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (!(left_value == right_value && ((left_section == reg_section && right_section == reg_section) || (left_section == undefined_section && right_section == undefined_section && add_symbol == op_symbol)))) { + return 0; + } else if (expr->type == EXPR_TYPE_BIT_INCLUSIVE_OR || expr->type == EXPR_TYPE_BIT_AND) { + + expr->type = EXPR_TYPE_SYMBOL; + break; + + } else if (expr->type != EXPR_TYPE_BIT_EXCLUSIVE_OR) { + return 0; + } + + } + + right_value += frag_offset; + + switch (expr->type) { + + case EXPR_TYPE_LOGICAL_OR: + + left_value = left_value || right_value; + break; + + case EXPR_TYPE_LOGICAL_AND: + + left_value = left_value && right_value; + break; + + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + + left_value = ((left_value == right_value && left_section == right_section && (finalize_symbols || left_frag == right_frag) && (left_section != undefined_section || add_symbol == op_symbol)) ? ~(unsigned long) 0 : 0); + + if (expr->type == EXPR_TYPE_NOT_EQUAL) { + left_value = ~left_value; + } + + break; + + case EXPR_TYPE_LESSER: + + left_value = (signed long) left_value < (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + left_value = (signed long) left_value <= (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_GREATER: + + left_value = (signed long) left_value > (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + left_value = (signed long) left_value >= (signed long) right_value ? ~(unsigned long) 0 : 0; + break; + + case EXPR_TYPE_ADD: + + left_value += right_value; + break; + + case EXPR_TYPE_SUBTRACT: + + left_value -= right_value; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + left_value |= right_value; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + left_value ^= right_value; + break; + + case EXPR_TYPE_BIT_AND: + + left_value &= right_value; + break; + + case EXPR_TYPE_MULTIPLY: + + left_value *= right_value; + break; + + case EXPR_TYPE_DIVIDE: + + if (right_value == 0) { + return 0; + } + + left_value = (signed long) left_value / (signed long) right_value; + break; + + case EXPR_TYPE_MODULUS: + + if (right_value == 0) { + return 0; + } + + left_value = (signed long) left_value % (signed long) right_value; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + left_value = (unsigned long) left_value << (unsigned long) right_value; + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + left_value = (unsigned long) left_value >> (unsigned long) right_value; + break; + + default: + + break; + + } + + expr->type = EXPR_TYPE_CONSTANT; + break; + + default: + + return 0; + + } + + if (expr->type == EXPR_TYPE_SYMBOL) { + + if (left_section == absolute_section) { + expr->type = EXPR_TYPE_CONSTANT; + } else if (left_section == reg_section && final_value == 0) { + expr->type = EXPR_TYPE_REGISTER; + } else if (add_symbol != original_add_symbol) { + final_value += left_value; + } + + expr->add_symbol = add_symbol; + + } + + if (expr->type == EXPR_TYPE_CONSTANT) { + final_value += left_value; + } + + expr->add_number = final_value; + return 1; + +} diff --git a/expr.h b/expr.h new file mode 100644 index 0000000..eb99d62 --- /dev/null +++ b/expr.h @@ -0,0 +1,87 @@ +/****************************************************************************** + * @file expr.h + *****************************************************************************/ +#ifndef _EXPR_H +#define _EXPR_H + +enum expr_type { + + EXPR_TYPE_INVALID, + EXPR_TYPE_ABSENT, + EXPR_TYPE_CONSTANT, + EXPR_TYPE_SYMBOL, + EXPR_TYPE_REGISTER, + EXPR_TYPE_INDEX, + EXPR_TYPE_LOGICAL_OR, + EXPR_TYPE_LOGICAL_AND, + EXPR_TYPE_EQUAL, + EXPR_TYPE_NOT_EQUAL, + EXPR_TYPE_LESSER, + EXPR_TYPE_LESSER_EQUAL, + EXPR_TYPE_GREATER, + EXPR_TYPE_GREATER_EQUAL, + EXPR_TYPE_ADD, + EXPR_TYPE_SUBTRACT, + EXPR_TYPE_BIT_INCLUSIVE_OR, + EXPR_TYPE_BIT_EXCLUSIVE_OR, + EXPR_TYPE_BIT_AND, + EXPR_TYPE_MULTIPLY, + EXPR_TYPE_DIVIDE, + EXPR_TYPE_MODULUS, + EXPR_TYPE_LEFT_SHIFT, + EXPR_TYPE_RIGHT_SHIFT, + EXPR_TYPE_LOGICAL_NOT, + EXPR_TYPE_BIT_NOT, + EXPR_TYPE_UNARY_MINUS, + + /* Machine dependent operators. */ + EXPR_TYPE_MACHINE_DEPENDENT_0, + EXPR_TYPE_MACHINE_DEPENDENT_1, + EXPR_TYPE_MACHINE_DEPENDENT_2, + EXPR_TYPE_MACHINE_DEPENDENT_3, + EXPR_TYPE_MACHINE_DEPENDENT_4, + EXPR_TYPE_MACHINE_DEPENDENT_5, + EXPR_TYPE_MACHINE_DEPENDENT_6, + EXPR_TYPE_MACHINE_DEPENDENT_7, + EXPR_TYPE_MACHINE_DEPENDENT_8, + EXPR_TYPE_MACHINE_DEPENDENT_9, + EXPR_TYPE_MACHINE_DEPENDENT_10, + EXPR_TYPE_MACHINE_DEPENDENT_11, + EXPR_TYPE_MACHINE_DEPENDENT_12, + EXPR_TYPE_MACHINE_DEPENDENT_13, + + /* How many expression types exist. */ + EXPR_TYPE_MAX + +}; + +#define EXPR_MODE_NORMAL 0 +#define EXPR_MODE_EVALUATE 1 + +struct expr { + + enum expr_type type; + + struct symbol *add_symbol; + struct symbol *op_symbol; + + unsigned long add_number; + +}; + +#define expression_evaluate_and_read_into(start, pp, expr) (read_into ((start), (pp), (expr), 0, EXPR_MODE_EVALUATE)) +#define expression_read_into(start, pp, expr) (read_into ((start), (pp), (expr), 0, EXPR_MODE_NORMAL)) + +struct section *current_location (struct expr *expr); +struct section *read_into (char *start, char **pp, struct expr *expr, unsigned int rank, int expr_mode); + +struct symbol *make_expr_symbol (struct expr *expr); +void expr_type_set_rank (enum expr_type expr_type, unsigned int rank); + +signed long absolute_expression_read_into (char *start, char **pp, struct expr *expr); +signed long get_result_of_absolute_expression (char *start, char **pp); + +int expr_symbol_get_filename_and_line_number (struct symbol *symbol, const char **filename_p, unsigned long *line_number_p); +int resolve_expression (struct expr *expr); + +#endif /* _EXPR_H */ diff --git a/fixup.c b/fixup.c new file mode 100644 index 0000000..379fec1 --- /dev/null +++ b/fixup.c @@ -0,0 +1,627 @@ +/****************************************************************************** + * @file fixup.c + *****************************************************************************/ +#include + +#include "as.h" +#include "expr.h" +#include "fixup.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static struct fixup *fixup_new_internal (struct frag *frag, unsigned long where, int size, struct symbol *add_symbol, struct symbol *sub_symbol, long add_number, int pcrel, int reloc_type) { + + struct fixup *fixup = xmalloc (sizeof (*fixup)); + + fixup->frag = frag; + fixup->size = size; + fixup->add_symbol = add_symbol; + fixup->sub_symbol = sub_symbol; + fixup->pcrel = pcrel; + fixup->reloc_type = reloc_type; + fixup->where = where; + fixup->add_number = add_number; + + if (current_frag_chain->last_fixup) { + + current_frag_chain->last_fixup->next = fixup; + current_frag_chain->last_fixup = fixup; + + } else { + current_frag_chain->last_fixup = current_frag_chain->first_fixup = fixup; + } + + return fixup; + +} + +struct fixup *fixup_new (struct frag *frag, unsigned long where, int size, struct symbol *add_symbol, long add_number, int pcrel, int reloc_type) { + return fixup_new_internal (frag, where, size, add_symbol, 0, add_number, pcrel, reloc_type); +} + +struct fixup *fixup_new_expr (struct frag *frag, unsigned long where, int size, struct expr *expr, int pcrel, int reloc_type) { + + struct symbol *add_symbol = 0, *sub_symbol = 0; + signed long add_number = 0; + + switch (expr->type) { + + case EXPR_TYPE_ABSENT: + + break; + + case EXPR_TYPE_CONSTANT: + + add_number = expr->add_number; + break; + + case EXPR_TYPE_SUBTRACT: + + sub_symbol = expr->op_symbol; + /* fall through */ + + case EXPR_TYPE_SYMBOL: + + add_symbol = expr->add_symbol; + add_number = expr->add_number; + + break; + + default: + + add_symbol = make_expr_symbol (expr); + break; + + } + + return fixup_new_internal (frag, where, size, add_symbol, sub_symbol, add_number, pcrel, reloc_type); + +} + + +signed long machine_dependent_estimate_size_before_relax (struct frag *frag, struct section *section); +signed long machine_dependent_relax_frag (struct frag *frag, struct section *section, signed long change); + +void machine_dependent_finish_frag (struct frag *frag); + +static unsigned long relax_align (unsigned long address, unsigned long alignment) { + + unsigned long mask, new_address; + + mask = ~(~((unsigned int) 0) << alignment); + new_address = (address + mask) & ~mask; + + return new_address - address; + +} + +static void relax_section (struct section *section) { + + struct frag *root_frag, *frag; + int changed; + + unsigned long address, frag_count, max_iterations; + unsigned long alignment_needed; + + section_set (section); + + root_frag = current_frag_chain->first_frag; + address = 0; + + for (frag_count = 0, frag = root_frag; frag; frag_count++, frag = frag->next) { + + frag->relax_marker = 0; + frag->address = address; + + address += frag->fixed_size; + + switch (frag->relax_type) { + + case RELAX_TYPE_NONE_NEEDED: + + break; + + case RELAX_TYPE_ALIGN: + case RELAX_TYPE_ALIGN_CODE: + + alignment_needed = relax_align (address, frag->offset); + + if (frag->relax_subtype != 0 && alignment_needed > frag->relax_subtype) { + alignment_needed = 0; + } + + address += alignment_needed; + break; + + case RELAX_TYPE_ORG: + case RELAX_TYPE_SPACE: + + break; + + case RELAX_TYPE_MACHINE_DEPENDENT: + + address += machine_dependent_estimate_size_before_relax (frag, section); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "%s: %lu: invalid relax type", frag->filename, frag->line_number); + exit (EXIT_FAILURE); + + } + + } + + /** + * Prevents an infinite loop caused by frag growing because of a symbol that moves when the frag grows. + * + * Example: + * + * .org _abc + 2 + * _abc: + */ + max_iterations = frag_count * frag_count; + + /* Too many frags might cause an overflow. */ + if (max_iterations < frag_count) { + max_iterations = frag_count; + } + + do { + + long change = 0; + changed = 0; + + for (frag = root_frag; frag; frag = frag->next) { + + long growth = 0; + unsigned long old_address; + + unsigned long old_offset; + unsigned long new_offset; + + frag->relax_marker = !frag->relax_marker; + + old_address = frag->address; + frag->address += change; + + switch (frag->relax_type) { + + case RELAX_TYPE_NONE_NEEDED: + + growth = 0; + break; + + case RELAX_TYPE_ALIGN: + case RELAX_TYPE_ALIGN_CODE: + + old_offset = relax_align (old_address + frag->fixed_size, frag->offset); + new_offset = relax_align (frag->address + frag->fixed_size, frag->offset); + + if (frag->relax_subtype != 0) { + + if (old_offset > frag->relax_subtype) { + old_offset = 0; + } + + if (new_offset > frag->relax_subtype) { + new_offset = 0; + } + + } + + growth = new_offset - old_offset; + break; + + case RELAX_TYPE_ORG: { + + struct symbol *symbol = state->end_symbol; + unsigned long target = frag->offset; + + if (frag->symbol) { + target += symbol_get_value (frag->symbol); + } + + if (symbol) { + + if (frag != symbol->frag) { + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + growth = 0; + + frag->next->address += target; + break; + + } + + } else if (state->format == AS_OUTPUT_BIN) { + + if (frag->offset == root_frag->offset) { + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + growth = 0; + + frag->next->address += target; + break; + + } + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + report_at (frag->filename, frag->line_number, REPORT_ERROR, "program origin redefined"); + + } + + growth = target - (frag->next->address + change); + + if (frag->address + frag->fixed_size > target) { + + report_at (frag->filename, frag->line_number, REPORT_ERROR, "attempt to move .org backwards"); + growth = 0; + + /* Changes the frag so no more errors appear because of it. */ + frag->relax_type = RELAX_TYPE_ALIGN; + frag->offset = 0; + frag->fixed_size = frag->next->address + change - frag->address; + + } + + break; + + } + + case RELAX_TYPE_SPACE: + + growth = 0; + + if (frag->symbol) { + + long amount = symbol_get_value (frag->symbol); + + if (symbol_get_section (frag->symbol) != absolute_section || symbol_is_undefined (frag->symbol)) { + + report_at (frag->filename, frag->line_number, REPORT_WARNING, ".space specifies non-absolute value"); + + /* Prevents the error from repeating. */ + frag->symbol = 0; + + } else if (amount < 0) { + + report_at (frag->filename, frag->line_number, REPORT_WARNING, ".space with negative value, ignoring"); + frag->symbol = 0; + + } else { + growth = old_address + frag->fixed_size + amount - frag->next->address; + } + + } + + break; + + case RELAX_TYPE_MACHINE_DEPENDENT: + + growth = machine_dependent_relax_frag (frag, section, change); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "%s: %lu: invalid relax type", frag->filename, frag->line_number); + exit (EXIT_FAILURE); + + } + + if (growth) { + + change += growth; + changed = 1; + + } + + } + + } while (changed && --max_iterations); + + if (changed) { + + report_at (0, 0, REPORT_FATAL_ERROR, "Infinite loop encountered whilst attempting to compute the addresses in section %s", section_get_name (section)); + exit (EXIT_FAILURE); + + } + +} + +static void finish_frags_after_relaxation (struct section *section) { + + struct frag *root_frag, *frag; + + section_set (section); + root_frag = current_frag_chain->first_frag; + + for (frag = root_frag; frag; frag = frag->next) { + + switch (frag->relax_type) { + + case RELAX_TYPE_NONE_NEEDED: + + break; + + case RELAX_TYPE_ORG: + case RELAX_TYPE_ALIGN: + case RELAX_TYPE_ALIGN_CODE: + case RELAX_TYPE_SPACE: { + + signed long i; + + unsigned char *p; + unsigned char fill; + + frag->offset = frag->next->address - (frag->address + frag->fixed_size); + + if (((long) (frag->offset)) < 0) { + + report_at (frag->filename, frag->line_number, REPORT_ERROR, "attempt to .org/.space backward (%lu)", frag->offset); + frag->offset = 0; + + } + + p = finished_frag_increase_fixed_size_by_frag_offset (frag); + fill = *p; + + for (i = 0; i < frag->offset; i++) { + p[i] = fill; + } + + break; + + } + + case RELAX_TYPE_MACHINE_DEPENDENT: + + machine_dependent_finish_frag (frag); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "invalid relax type"); + exit (EXIT_FAILURE); + + } + + } + +} + +static void adjust_reloc_symbols_of_section (struct section *section) { + + struct fixup *fixup; + section_set (section); + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + if (fixup->done) { + continue; + } + + if (fixup->add_symbol) { + + struct symbol *symbol = fixup->add_symbol; + struct section *symbol_section; + + /* Resolves symbols that have not been resolved yet (expression symbols). */ + symbol_resolve_value (symbol); + + if (fixup->sub_symbol) { + symbol_resolve_value (fixup->sub_symbol); + } + + if (symbol_uses_reloc_symbol (symbol)) { + + fixup->add_number += symbol_get_value_expression (symbol)->add_number; + + symbol = symbol_get_value_expression (symbol)->add_symbol; + fixup->add_symbol = symbol; + + } + + if (symbol_force_reloc (symbol)) { + continue; + } + + symbol_section = symbol_get_section (symbol); + + if (symbol_section == absolute_section) { + continue; + } + + fixup->add_number += symbol_get_value (symbol); + fixup->add_symbol = section_symbol (symbol_get_section (symbol)); + + } + + } + +} + +signed long machine_dependent_pcrel_from (struct fixup *fixup); + +int machine_dependent_force_relocation_local (struct fixup *fixup); +void machine_dependent_apply_fixup (struct fixup *fixup, unsigned long value); + +static unsigned long fixup_section (struct section *section) { + + unsigned long section_reloc_count = 0; + unsigned long add_number; + + struct section *add_symbol_section = absolute_section; + struct fixup *fixup; + + section_set (section); + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + add_number = fixup->add_number; + + if (fixup->add_symbol) { + add_symbol_section = symbol_get_section (fixup->add_symbol); + } + + if (fixup->sub_symbol) { + + struct section *sub_symbol_section; + + symbol_resolve_value (fixup->sub_symbol); + sub_symbol_section = symbol_get_section (fixup->sub_symbol); + + if (fixup->add_symbol && add_symbol_section == sub_symbol_section && !symbol_force_reloc (fixup->add_symbol) && !symbol_force_reloc (fixup->add_symbol)) { + + add_number += symbol_get_value (fixup->add_symbol); + add_number -= symbol_get_value (fixup->sub_symbol); + + fixup->add_number = add_number; + fixup->add_symbol = 0; + fixup->sub_symbol = 0; + + } else if (sub_symbol_section == section) { + + add_number -= symbol_get_value (fixup->sub_symbol); + + if (!fixup->pcrel) { + add_number += machine_dependent_pcrel_from (fixup); + } + + fixup->sub_symbol = 0; + fixup->pcrel = 1; + + } else { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "+++fixup_section sub_symbol"); + exit (EXIT_FAILURE); + + } + + } + + if (fixup->add_symbol) { + + if ((add_symbol_section == section) && !machine_dependent_force_relocation_local (fixup)) { + + add_number += symbol_get_value (fixup->add_symbol); + fixup->add_number = add_number; + + if (fixup->pcrel) { + + add_number -= machine_dependent_pcrel_from (fixup); + fixup->pcrel = 0; + + } + + fixup->add_symbol = 0; + + } else if (add_symbol_section == absolute_section || (fixup->reloc_type == RELOC_TYPE_FAR_CALL && !symbol_is_undefined (fixup->add_symbol))) { + + add_number += symbol_get_value (fixup->add_symbol); + + fixup->add_number = add_number; + fixup->add_symbol = 0; + + } + + } + + if (fixup->pcrel) { + + add_number -= machine_dependent_pcrel_from (fixup); + + if (!fixup->add_symbol && !fixup->done) { + fixup->add_symbol = section_symbol (absolute_section); + } + + } + + machine_dependent_apply_fixup (fixup, add_number); + + if (!fixup->done) { + section_reloc_count++; + } + + if (fixup->size < sizeof (unsigned long)) { + + unsigned long mask = -1; + mask <<= fixup->size * 8 - !!fixup->fixup_signed; + + if ((add_number & mask) && (fixup->fixup_signed ? ((add_number & mask) != mask) : (-add_number & mask))) { + + report_at (0, 0, REPORT_ERROR, (add_number > 1000) + ? "value of %lu too large for field of %u byte%s at %#lx" : "value of %lu too large for field of %u byte%s at %#lx", + add_number, fixup->size, ((fixup->size == 1) ? "" : "s"), fixup->frag->address + fixup->where); + + } + + } + + } + + return section_reloc_count; + +} + +void fixup_code (void) { + + struct section *section; + struct symbol *symbol; + + for (section = sections; section; section = section_get_next_section (section)) { + relax_section (section); + } + + for (section = sections; section; section = section_get_next_section (section)) { + finish_frags_after_relaxation (section); + } + + { + + unsigned long address, text_section_size; + struct frag *frag; + + section_set (text_section); + + text_section_size = current_frag_chain->last_frag->address + current_frag_chain->last_frag->fixed_size; + address = text_section_size; + + section_set (data_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + frag->address = address; + address += frag->fixed_size; + + } + + section_set (bss_section); + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + frag->address = address; + address += frag->fixed_size; + + } + + } + + finalize_symbols = 1; + + for (symbol = symbols; symbol; symbol = symbol->next) { + symbol_resolve_value (symbol); + } + + for (section = sections; section; section = section_get_next_section (section)) { + adjust_reloc_symbols_of_section (section); + } + + for (section = sections; section; section = section_get_next_section (section)) { + fixup_section (section); + } + +} diff --git a/fixup.h b/fixup.h new file mode 100644 index 0000000..833117f --- /dev/null +++ b/fixup.h @@ -0,0 +1,31 @@ +/****************************************************************************** + * @file fixup.h + *****************************************************************************/ +#ifndef _FIXUP_H +#define _FIXUP_H + +#include "expr.h" + +#define RELOC_TYPE_DEFAULT 0 +#define RELOC_TYPE_FAR_CALL 1 + +struct fixup { + + struct frag *frag; + int done, fixup_signed; + + unsigned long where; + unsigned int size; + + struct symbol *add_symbol, *sub_symbol; + long add_number; + + int pcrel, reloc_type; + struct fixup *next; + +}; + +struct fixup *fixup_new (struct frag *frag, unsigned long where, int size, struct symbol *add_symbol, long add_number, int pcrel, int reloc_type); +struct fixup *fixup_new_expr (struct frag *frag, unsigned long where, int size, struct expr *expr, int pcrel, int reloc_type); + +#endif /* _FIXUP_H */ diff --git a/frag.c b/frag.c new file mode 100644 index 0000000..a0e7581 --- /dev/null +++ b/frag.c @@ -0,0 +1,197 @@ +/****************************************************************************** + * @file frag.c + *****************************************************************************/ +#include "frag.h" +#include "lib.h" +#include "section.h" + +struct frag zero_address_frag = { 0 }; +struct frag *current_frag = 0; + +struct frag *frag_alloc (void) { + return xmalloc (sizeof (struct frag)); +} + +int frags_is_greater_than_offset (unsigned long offset2, struct frag *frag2, unsigned long offset1, struct frag *frag1, signed long *offset_p) { + + signed long difference; + struct frag *frag; + + /* Checks for something that should be impossible. */ + if (frag2 == frag1 || offset1 > frag1->fixed_size) { + return 0; + } + + difference = offset2 - offset1; + + for (frag = frag1;;) { + + difference += frag->fixed_size; + frag = frag->next; + + if (frag == frag2) { + + if (difference == 0) { + return 0; + } + + break; + + } + + if (!frag) { + return 0; + } + + } + + *offset_p = offset2 - offset1 - difference; + return 1; + +} + +int frags_offset_is_fixed (struct frag *frag1, struct frag *frag2, signed long *offset_p) { + + signed long offset = frag1->address - frag2->address; + struct frag *frag; + + if (frag1 == frag2) { + + *offset_p = offset; + return 1; + + } + + /* Checks if frag2 is after frag1. */ + frag = frag1; + + while (frag->relax_type == RELAX_TYPE_NONE_NEEDED) { + + offset += frag->fixed_size; + + if (!(frag = frag->next)) { + break; + } + + if (frag == frag2) { + + *offset_p = offset; + return 1; + + } + + } + + /* Checks if frag1 is after frag2. */ + offset = frag1->address - frag2->address; + frag = frag2; + + while (frag->relax_type == RELAX_TYPE_NONE_NEEDED) { + + offset -= frag->fixed_size; + + if (!(frag = frag->next)) { + break; + } + + if (frag == frag1) { + + *offset_p = offset; + return 1; + + } + + } + + return 0; + +} + +unsigned char *finished_frag_increase_fixed_size_by_frag_offset (struct frag *frag) { + + frag->fixed_size += frag->offset; + + if (frag->fixed_size > frag->size) { + + frag->buf = xrealloc (frag->buf, frag->fixed_size); + frag->size = frag->fixed_size; + + } + + return (frag->buf + frag->fixed_size - frag->offset); + +} + +unsigned char *frag_alloc_space (unsigned long space) { + + if (current_frag->fixed_size + space >= current_frag->size) { + + current_frag->size += ((space > FRAG_BUF_REALLOC_STEP) ? space : FRAG_BUF_REALLOC_STEP); + current_frag->buf = xrealloc (current_frag->buf, current_frag->size); + + } + + return current_frag->buf + current_frag->fixed_size; + +} + +unsigned char *frag_increase_fixed_size (unsigned long increase) { + + frag_alloc_space (increase); + + current_frag->fixed_size += increase; + return (current_frag->buf + current_frag->fixed_size - increase); + +} + +void frag_align (signed long alignment, int fill_char, signed long max_bytes_to_skip) { + + (frag_alloc_space (1 << alignment))[0] = fill_char; + frag_set_as_variant (RELAX_TYPE_ALIGN, max_bytes_to_skip, 0, alignment, 0); + +} + +void frag_align_code (signed long alignment, signed long max_bytes_to_skip) { + + (frag_alloc_space (1 << alignment))[0] = 0x90; + frag_set_as_variant (RELAX_TYPE_ALIGN_CODE, max_bytes_to_skip, 0, alignment, 0); + +} + +void frag_append_1_char (unsigned char ch) { + + if (current_frag->fixed_size == current_frag->size) { + + current_frag->size += FRAG_BUF_REALLOC_STEP; + current_frag->buf = xrealloc (current_frag->buf, current_frag->size); + + } + + current_frag->buf[current_frag->fixed_size++] = ch; + +} + +void frag_new (void) { + + struct frag *prev_frag = current_frag; + + current_frag = frag_alloc (); + current_frag->relax_type = RELAX_TYPE_NONE_NEEDED; + + prev_frag->next = current_frag; + current_frag_chain->last_frag = current_frag; + +} + +void frag_set_as_variant (int relax_type, int relax_subtype, struct symbol *symbol, signed long offset, unsigned long opcode_offset_in_buf) { + + current_frag->relax_type = relax_type; + current_frag->relax_subtype = relax_subtype; + current_frag->symbol = symbol; + current_frag->offset = offset; + current_frag->opcode_offset_in_buf = opcode_offset_in_buf; + + get_filename_and_line_number (&(current_frag->filename), &(current_frag->line_number)); + frag_new (); + +} diff --git a/frag.h b/frag.h new file mode 100644 index 0000000..737b8a2 --- /dev/null +++ b/frag.h @@ -0,0 +1,54 @@ +/****************************************************************************** + * @file frag.h + *****************************************************************************/ +#ifndef _FRAG_H +#define _FRAG_H + +#define RELAX_TYPE_NONE_NEEDED 0 +#define RELAX_TYPE_ALIGN 1 +#define RELAX_TYPE_ALIGN_CODE 2 +#define RELAX_TYPE_ORG 3 +#define RELAX_TYPE_SPACE 4 +#define RELAX_TYPE_MACHINE_DEPENDENT 5 + +struct frag { + + unsigned long fixed_size, address, size; + unsigned int relax_type, relax_subtype; + + struct symbol *symbol; + unsigned char *buf; + + signed long offset; + unsigned long opcode_offset_in_buf; + + const char *filename; + unsigned long line_number; + + int relax_marker; + struct frag *next; + +}; + +extern struct frag zero_address_frag; +extern struct frag *current_frag; + +#define FRAG_BUF_REALLOC_STEP 16 + +struct frag *frag_alloc (void); + +int frags_is_greater_than_offset (unsigned long offset2, struct frag *frag2, unsigned long offset1, struct frag *frag1, signed long *offset_p); +int frags_offset_is_fixed (struct frag *frag1, struct frag *frag2, signed long *offset_p); + +unsigned char *finished_frag_increase_fixed_size_by_frag_offset (struct frag *frag); +unsigned char *frag_alloc_space (unsigned long space); +unsigned char *frag_increase_fixed_size (unsigned long increase); + +void frag_align (signed long alignment, int fill_char, signed long max_bytes_to_skip); +void frag_align_code (signed long alignment, signed long max_bytes_to_skip); + +void frag_append_1_char (unsigned char ch); +void frag_new (void); +void frag_set_as_variant (int relax_type, int relax_subtype, struct symbol *symbol, signed long offset, unsigned long opcode_offset_in_buf); + +#endif /* _FRAG_H */ diff --git a/hashtab.c b/hashtab.c new file mode 100644 index 0000000..b6863a5 --- /dev/null +++ b/hashtab.c @@ -0,0 +1,215 @@ +/****************************************************************************** + * @file hashtab.c + *****************************************************************************/ +#include +#include +#include + +#include "hashtab.h" + +static struct hashtab_entry *find_entry (struct hashtab_entry *entries, unsigned int capacity, struct hashtab_name *key); + +static int adjust_capacity (struct hashtab *table, unsigned int new_capacity) { + + struct hashtab_entry *new_entries, *old_entries; + unsigned int i, new_count, old_capacity; + + if ((new_entries = malloc (sizeof (*new_entries) * new_capacity)) == NULL) { + return -2; + } + + for (i = 0; i < new_capacity; i++) { + + struct hashtab_entry *entry = &new_entries[i]; + + entry->key = NULL; + entry->value = NULL; + + } + + old_entries = table->entries; + old_capacity = table->capacity; + + new_count = 0; + + for (i = 0; i < old_capacity; i++) { + + struct hashtab_entry *entry = &old_entries[i], *dest; + + if (entry->key == NULL) { + continue; + } + + dest = find_entry (new_entries, new_capacity, entry->key); + + dest->key = entry->key; + dest->value = entry->value; + + new_count++; + + } + + free (old_entries); + + table->capacity = new_capacity; + table->count = new_count; + table->entries = new_entries; + table->used = new_count; + + return 0; + +} + +static struct hashtab_entry *find_entry (struct hashtab_entry *entries, unsigned int capacity, struct hashtab_name *key) { + + struct hashtab_entry *tombstone = NULL; + unsigned int index; + + for (index = key->hash % capacity; ; index = (index + 1) % capacity) { + + struct hashtab_entry *entry = &entries[index]; + + if (entry->key == NULL) { + + if (entry->value == NULL) { + + if (tombstone == NULL) { + return entry; + } + + return tombstone; + + } else if (tombstone == NULL) { + tombstone = entry; + } + + } else if (entry->key->bytes == key->bytes) { + + if (memcmp (entry->key->chars, key->chars, key->bytes) == 0 && entry->key->hash == key->hash) { + return entry; + } + + } + + } + +} + +static unsigned int hash_string (const void *p, unsigned int length) { + + unsigned char *str = (unsigned char *) p; + unsigned int i, result = 0; + + for (i = 0; i < length; i++) { + result = (((unsigned int) str[i]) << 12) + (result >> 6) + result + (result >> 3) + (((unsigned int) str[i]) << 8) - result; + } + + return result; + +} + +struct hashtab_name *hashtab_alloc_name (const char *str) { + + struct hashtab_name *name; + unsigned int bytes = strlen (str), hash = hash_string (str, bytes); + + if ((name = malloc (sizeof (*name))) == NULL) { + return NULL; + } + + name->bytes = bytes; + name->chars = str; + name->hash = hash; + + return name; + +} + +struct hashtab_name *hashtab_get_key (struct hashtab *table, const char *name) { + + struct hashtab_name *key; + struct hashtab_entry *entry; + + if (table == NULL || table->count == 0 || !(key = hashtab_alloc_name (name))) { + return 0; + } + + entry = find_entry (table->entries, table->capacity, key); + free (key); + + return entry->key; + +} + +void *hashtab_get (struct hashtab *table, struct hashtab_name *key) { + + struct hashtab_entry *entry; + + if (table == NULL || table->count == 0) { + return NULL; + } + + entry = find_entry (table->entries, table->capacity, key); + + if (entry->key == NULL) { + return NULL; + } + + return entry->value; + +} + +int hashtab_put (struct hashtab *table, struct hashtab_name *key, void *value) { + + const int MIN_CAPACITY = 15; + + struct hashtab_entry *entry; + int ret = 0; + + if (table->used >= table->capacity / 2) { + + int capacity = table->capacity * 2 - 1; + + if (capacity < MIN_CAPACITY) { + capacity = MIN_CAPACITY; + } + + if ((ret = adjust_capacity (table, capacity))) { + return ret; + } + + } + + entry = find_entry (table->entries, table->capacity, key); + + if (entry->key == NULL) { + + table->count++; + + if (entry->value == NULL) { + table->used++; + } + + } + + entry->key = key; + entry->value = value; + + return 0; + +} + +void hashtab_remove (struct hashtab *table, struct hashtab_name *key) { + + struct hashtab_entry *entry; + + if ((entry = find_entry (table->entries, table->capacity, key)) != NULL) { + + entry->key = NULL; + entry->value = NULL; + + --table->count; + + } + +} diff --git a/hashtab.h b/hashtab.h new file mode 100644 index 0000000..47aab3b --- /dev/null +++ b/hashtab.h @@ -0,0 +1,36 @@ +/****************************************************************************** + * @file hashtab.h + *****************************************************************************/ +#ifndef _HASHTAB_H +#define _HASHTAB_H + +struct hashtab_name { + + const char *chars; + int bytes, hash; + +}; + +struct hashtab_entry { + + struct hashtab_name *key; + void *value; + +}; + +struct hashtab { + + struct hashtab_entry *entries; + int capacity, count, used; + +}; + +struct hashtab_name *hashtab_alloc_name (const char *str); +struct hashtab_name *hashtab_get_key (struct hashtab *table, const char *name); + +void *hashtab_get (struct hashtab *table, struct hashtab_name *key); + +int hashtab_put (struct hashtab *table, struct hashtab_name *key, void *value); +void hashtab_remove (struct hashtab *table, struct hashtab_name *key); + +#endif /* _HASHTAB_H */ diff --git a/intel.c b/intel.c new file mode 100644 index 0000000..c7d153e --- /dev/null +++ b/intel.c @@ -0,0 +1,4515 @@ +/****************************************************************************** + * @file intel.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "as.h" +#include "expr.h" +#include "fixup.h" +#include "frag.h" +#include "hashtab.h" +#include "intel.h" +#include "kwd.h" +#include "lex.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +struct templates { + + const char *name; + struct template *start, *end; + +}; + +#define RELAX_SUBTYPE_SHORT_JUMP 0x00 +#define RELAX_SUBTYPE_CODE16_JUMP 0x01 +#define RELAX_SUBTYPE_LONG_JUMP 0x02 + +#define RELAX_SUBTYPE_SHORT16_JUMP (RELAX_SUBTYPE_SHORT_JUMP | RELAX_SUBTYPE_CODE16_JUMP) +#define RELAX_SUBTYPE_LONG16_JUMP (RELAX_SUBTYPE_LONG_JUMP | RELAX_SUBTYPE_CODE16_JUMP) + +#define RELAX_SUBTYPE_UNCONDITIONAL_JUMP 0x00 +#define RELAX_SUBTYPE_CONDITIONAL_JUMP 0x01 +#define RELAX_SUBTYPE_CONDITIONAL_JUMP86 0x02 +#define RELAX_SUBTYPE_FORCED_SHORT_JUMP 0x03 + +#define ENCODE_RELAX_SUBTYPE(type, size) (((type) << 2) | (size)) +#define TYPE_FROM_RELAX_SUBTYPE(subtype) ((subtype) >> 2) + +#define DISPLACEMENT_SIZE_FROM_RELAX_SUBSTATE(s) \ + (((s) & 3) == RELAX_SUBTYPE_LONG_JUMP ? 4 : (((s) & 3) == RELAX_SUBTYPE_LONG16_JUMP ? 2 : 1)) + +struct relax_table_entry { + + long forward_reach; + long backward_reach; + long size_of_variable_part; + + unsigned int next_subtype; + +}; + +struct relax_table_entry relax_table[] = { + + /* Unconditional jumps. */ + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP) }, + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP) }, + { 0, 0, 4, 0 }, + { 0, 0, 2, 0 }, + + /* Conditional jumps. */ + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP) }, + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP) }, + { 0, 0, 5, 0 }, + { 0, 0, 3, 0 }, + + /* Conditional jumps 86. */ + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG_JUMP) }, + { 127 + 1, -128 + 1, 1, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG16_JUMP) }, + { 0, 0, 5, 0 }, + { 0, 0, 4, 0 }, + + /* Forced short jump that cannot be relaxed. */ + { 127 + 1, -128 + 1, 1, 0 }, + +}; + +#define TWOBYTE_OPCODE 0x0F + +static struct hashtab hashtab_templates = { 0 }; +static struct hashtab hashtab_regs = { 0 }; + +#define DEFAULT_CPU_ARCH_FLAGS (~0LU) +#define DEFAULT_CPU_ARCH_NAME "ALL" + +static unsigned long cpu_arch_flags = DEFAULT_CPU_ARCH_FLAGS; + +static char *cpu_arch_name = 0; +static char *cpu_extensions_name = 0; + +struct cpu_arch_entry { + + const char *name; + unsigned long cpu_flags; + +}; + +#define CPU_I8086_FLAGS (CPU_8086) +#define CPU_I186_FLAGS (CPU_I8086_FLAGS | CPU_186) +#define CPU_I286_FLAGS (CPU_I186_FLAGS | CPU_286) +#define CPU_I386_FLAGS (CPU_I286_FLAGS | CPU_386) + +/* i486 is the first CPU with a FPU integrated. */ +#define CPU_I486_FLAGS (CPU_I386_FLAGS | CPU_486 | CPU_387) +#define CPU_I686_FLAGS (CPU_I486_FLAGS | CPU_686 | CPU_687 | CPU_CMOV) + +static const struct cpu_arch_entry cpu_archs[] = { + + { "i8086", CPU_I8086_FLAGS }, + { "i186", CPU_I186_FLAGS }, + { "i286", CPU_I286_FLAGS }, + { "i386", CPU_I386_FLAGS }, + { "i486", CPU_I486_FLAGS }, + { "i686", CPU_I686_FLAGS } + +}; + +static const struct cpu_arch_entry cpu_extensions[] = { + + { "8087", CPU_8087 }, + { "287", CPU_287 }, + { "387", CPU_387 }, + { "687", CPU_387 | CPU_687 }, + { "cmov", CPU_CMOV } + +}; + +static const struct cpu_arch_entry cpu_no_extensions[] = { + + { "no87", CPU_8087 | CPU_287 | CPU_387 | CPU_687 }, + { "no8087", CPU_8087 }, + { "no287", CPU_287 }, + { "no387", CPU_387 }, + { "no687", CPU_687 }, + { "nocmov", CPU_CMOV } + +}; + +static struct reg_entry *reg_esp = 0; +static struct reg_entry *reg_ss = 0; +static struct reg_entry *reg_ds = 0; + +#define NO_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define B_SUF (NO_WSUF | NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define W_SUF (NO_BSUF | NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define L_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_QSUF | NO_INTELSUF) +#define Q_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_LSUF | NO_INTELSUF) +#define INTEL_SUF (NO_BSUF | NO_WSUF | NO_SSUF | NO_LSUF | NO_QSUF) + +#define BW_SUF (NO_SSUF | NO_LSUF | NO_QSUF | NO_INTELSUF) +#define WL_SUF (NO_BSUF | NO_SSUF | NO_QSUF | NO_INTELSUF) +#define BWL_SUF (NO_SSUF | NO_QSUF | NO_INTELSUF) +#define SL_SUF (NO_BSUF | NO_WSUF | NO_QSUF | NO_INTELSUF) + +/*static int allow_no_prefix_reg = 0;*/ +static int intel_syntax = 1, bits = 16; + +/** Table for lexical analysis. */ +static char register_chars_table[256] = { 0 }; + +static struct template template_table[] = { + + /* Move instructions. */ + { "mov", 2, 0xA0, NONE, BWL_SUF | D | W, { DISP16 | DISP32, ACC, 0 }, CPU_8086 }, + { "mov", 2, 0x88, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "mov", 2, 0xB0, NONE, BWL_SUF | W | SHORT_FORM, { ENCODABLEIMM, REG8 | REG16 | REG32, 0 }, CPU_8086 }, + { "mov", 2, 0xC6, NONE, BWL_SUF | D | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + /* Move instructions for segment registers. */ + { "mov", 2, 0x8C, NONE, WL_SUF | MODRM, { SEGMENT1, WORD_REG | INV_MEM, 0 }, CPU_8086 }, + { "mov", 2, 0x8C, NONE, W_SUF | MODRM | IGNORE_SIZE, { SEGMENT1, ANY_MEM, 0 }, CPU_8086 }, + { "mov", 2, 0x8C, NONE, WL_SUF | MODRM, { SEGMENT2, WORD_REG | INV_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x8C, NONE, W_SUF | MODRM | IGNORE_SIZE, { SEGMENT2, ANY_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x8E, NONE, WL_SUF | MODRM | IGNORE_SIZE, { WORD_REG | INV_MEM, SEGMENT1, 0 }, CPU_8086 }, + { "mov", 2, 0x8E, NONE, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, SEGMENT1, 0 }, CPU_8086 }, + { "mov", 2, 0x8E, NONE, WL_SUF | MODRM | IGNORE_SIZE, { WORD_REG | INV_MEM, SEGMENT2, 0 }, CPU_386 }, + { "mov", 2, 0x8E, NONE, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, SEGMENT2, 0 }, CPU_386 }, + + /* Move instructions for control, debug and test registers. */ + { "mov", 2, 0x0F20, NONE, L_SUF | D | MODRM | IGNORE_SIZE, { CONTROL, REG32 | INV_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x0F21, NONE, L_SUF | D | MODRM | IGNORE_SIZE, { DEBUG, REG32 | INV_MEM, 0 }, CPU_386 }, + { "mov", 2, 0x0F24, NONE, L_SUF | D | MODRM | IGNORE_SIZE, { TEST, REG32 | INV_MEM, 0 }, CPU_386 }, + + /* Move with sign extend. */ + /* "movsbl" and "movsbw" are not unified into "movsb" to prevent conflict with "movs". */ + { "movsbl", 2, 0x0FBE, NONE, NO_SUF | MODRM, { REG8 | ANY_MEM, REG32, 0 }, CPU_386 }, + { "movsbw", 2, 0x0FBE, NONE, NO_SUF | MODRM, { REG8 | ANY_MEM, REG16, 0 }, CPU_386 }, + { "movswl", 2, 0x0FBF, NONE, NO_SUF | MODRM, { REG16 | ANY_MEM, REG32, 0 }, CPU_386 }, + + /* Alternative syntax. */ + { "movsx", 2, 0x0FBE, NONE, BW_SUF | W | MODRM, { REG8 | REG16 | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + + /* Move with zero extend. */ + { "movzb", 2, 0x0FB6, NONE, WL_SUF | MODRM, { REG8 | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "movzwl", 2, 0x0FB7, NONE, NO_SUF | MODRM, { REG16 | ANY_MEM, REG32, 0 }, CPU_386 }, + + /* Alternative syntax. */ + { "movzx", 2, 0x0FB6, NONE, BW_SUF | W | MODRM, { REG8 | REG16 | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + + /* Push instructions. */ + { "push", 1, 0x50, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "push", 1, 0xFF, 6, WL_SUF | DEFAULT_SIZE | MODRM, { WORD_REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "push", 1, 0x6A, NONE, WL_SUF | DEFAULT_SIZE, { IMM8S, 0, 0 }, CPU_186 }, + { "push", 1, 0x68, NONE, WL_SUF | DEFAULT_SIZE, { IMM16 | IMM32, 0, 0 }, CPU_186 }, + + { "push", 1, 0x06, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT1, 0, 0 }, 0}, + { "push", 1, 0x0FA0, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT2, 0, 0 }, 3}, + + { "pusha", 0, 0x60, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_186 }, + + /* Pop instructions. */ + { "pop", 1, 0x58, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "pop", 1, 0x8F, NONE, WL_SUF | DEFAULT_SIZE | MODRM, { WORD_REG | ANY_MEM, 0, 0 }, CPU_8086 }, + +#define POP_SEGMENT_SHORT 0x07 + + { "pop", 1, 0x07, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT1, 0, 0 }, CPU_8086 }, + { "pop", 1, 0x0FA1, NONE, WL_SUF | DEFAULT_SIZE | SEGSHORTFORM, { SEGMENT2, 0, 0 }, CPU_386 }, + + { "popa", 0, 0x61, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_186 }, + + /* Exchange instructions. */ + { "xchg", 2, 0x90, NONE, WL_SUF | SHORT_FORM, { WORD_REG, ACC, 0 }, CPU_8086 }, + { "xchg", 2, 0x90, NONE, WL_SUF | SHORT_FORM, { ACC, WORD_REG, 0 }, CPU_8086 }, + { "xchg", 2, 0x86, NONE, BWL_SUF | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "xchg", 2, 0x86, NONE, BWL_SUF | W | MODRM, { REG | ANY_MEM, REG, 0 }, CPU_8086 }, + + /* In/out for ports. */ + { "in", 2, 0xE4, NONE, BWL_SUF | W, { IMM8, ACC, 0 }, CPU_8086 }, + { "in", 2, 0xEC, NONE, BWL_SUF | W, { PORT, ACC, 0 }, CPU_8086 }, + { "in", 1, 0xE4, NONE, BWL_SUF | W, { IMM8, 0, 0 }, CPU_8086 }, + { "in", 1, 0xEC, NONE, BWL_SUF | W, { PORT, 0, 0 }, CPU_8086 }, + + { "out", 2, 0xE6, NONE, BWL_SUF | W, { ACC, IMM8, 0 }, CPU_8086 }, + { "out", 2, 0xEE, NONE, BWL_SUF | W, { ACC, PORT, 0 }, CPU_8086 }, + { "out", 2, 0xE6, NONE, BWL_SUF | W, { IMM8, 0, 0 }, CPU_8086 }, + { "out", 2, 0xEE, NONE, BWL_SUF | W, { PORT, 0, 0 }, CPU_8086 }, + + /* Load effective address. */ + { "lea", 2, 0x8D, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_8086 }, + + /* Load far pointer from memory. */ + { "lds", 2, 0xC5, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_8086 }, + { "les", 2, 0xC4, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_8086 }, + { "lfs", 2, 0x0FB4, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "lgs", 2, 0x0FB5, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "lss", 2, 0x0FB2, NONE, WL_SUF | MODRM, { ANY_MEM, WORD_REG, 0 }, CPU_386 }, + + /* Flags register instructions. */ + { "cmc", 0, 0xF5, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "clc", 0, 0xF8, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "stc", 0, 0xF9, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "cli", 0, 0xFA, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "sti", 0, 0xFB, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "cld", 0, 0xFC, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "std", 0, 0xFD, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "clts", 0, 0x0F06, NONE, NO_SUF, { 0, 0, 0 }, CPU_286 }, + { "lahf", 0, 0x9F, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "sahf", 0, 0x9E, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "pushf", 0, 0x9C, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "popf", 0, 0x9D, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + + /* Arithmetic instructions. */ + { "add", 2, 0x00, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "add", 2, 0x83, 0, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "add", 2, 0x04, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "add", 2, 0x80, 0, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "inc", 1, 0x40, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "inc", 1, 0xFE, 0, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sub", 2, 0x28, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sub", 2, 0x83, 5, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "sub", 2, 0x2C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "sub", 2, 0x80, 5, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "dec", 1, 0x48, NONE, WL_SUF | SHORT_FORM, { WORD_REG, 0, 0 }, CPU_8086 }, + { "dec", 1, 0xFE, 1, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sbb", 2, 0x18, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sbb", 2, 0x83, 3, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "sbb", 2, 0x1C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "sbb", 2, 0x80, 3, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "cmp", 2, 0x38, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "cmp", 2, 0x83, 7, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "cmp", 2, 0x3C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "cmp", 2, 0x80, 7, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "test", 2, 0x84, NONE, BWL_SUF | W | MODRM, { REG | ANY_MEM, REG, 0 }, CPU_8086 }, + { "test", 2, 0x84, NONE, BWL_SUF | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "test", 2, 0xA8, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "test", 2, 0xF6, 0, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "and", 2, 0x20, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "and", 2, 0x83, 4, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "and", 2, 0x24, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "and", 2, 0x80, 4, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "or", 2, 0x08, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "or", 2, 0x83, 1, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "or", 2, 0x0C, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "or", 2, 0x80, 1, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "xor", 2, 0x30, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "xor", 2, 0x83, 6, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "xor", 2, 0x34, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "xor", 2, 0x80, 6, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "clr", 1, 0x30, NONE, BWL_SUF | W | MODRM | REG_DUPLICATION, { REG, 0, 0 }, CPU_8086 }, + + { "adc", 2, 0x10, NONE, BWL_SUF | D | W | MODRM, { REG, REG | ANY_MEM, 0 }, CPU_8086 }, + { "adc", 2, 0x83, 2, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, 0 }, CPU_8086 }, + { "adc", 2, 0x14, NONE, BWL_SUF | W, { ENCODABLEIMM, ACC, 0 }, CPU_8086 }, + { "adc", 2, 0x80, 2, BWL_SUF | W | MODRM, { ENCODABLEIMM, REG | ANY_MEM, 0 }, CPU_8086 }, + + { "neg", 1, 0xF6, 3, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "not", 1, 0xF6, 2, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "aaa", 0, 0x37, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "aas", 0, 0x3F, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + + { "daa", 0, 0x27, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "das", 0, 0x2F, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + + { "aad", 0, 0xD50A, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "aad", 1, 0xD5, NONE, NO_SUF, { IMM8, 0, 0 }, CPU_8086 }, + + { "aam", 0, 0xD40A, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "aam", 1, 0xD4, NONE, NO_SUF, { IMM8, 0, 0 }, CPU_8086 }, + + /* Conversion instructions. */ + { "cbw", 0, 0x98, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cwde", 0, 0x98, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_8086 }, + { "cwd", 0, 0x99, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cdq", 0, 0x99, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_386 }, + + /* Other naming. */ + { "cbtw", 0, 0x98, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cwtl", 0, 0x98, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_8086 }, + { "cwtd", 0, 0x99, NONE, NO_SUF | SIZE16, { 0, 0, 0 }, CPU_8086 }, + { "cltd", 0, 0x99, NONE, NO_SUF | SIZE32, { 0, 0, 0 }, CPU_386 }, + + { "mul", 1, 0xF6, 4, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "imul", 1, 0xF6, 5, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "imul", 2, 0x0FAF, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "imul", 3, 0x6B, NONE, WL_SUF | MODRM, { IMM8S, WORD_REG | ANY_MEM, WORD_REG }, CPU_186 }, + { "imul", 3, 0x69, NONE, WL_SUF | MODRM, { IMM16 | IMM32, WORD_REG | ANY_MEM, WORD_REG }, CPU_186 }, + { "imul", 2, 0x6B, NONE, WL_SUF | MODRM | REG_DUPLICATION, { IMM8S, WORD_REG, 0 }, CPU_186 }, + { "imul", 2, 0x69, NONE, WL_SUF | MODRM | REG_DUPLICATION, { IMM16 | IMM32, WORD_REG, 0 }, CPU_186 }, + + { "div", 1, 0xF6, 6, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "div", 2, 0xF6, 6, BWL_SUF | W | MODRM, { REG | ANY_MEM, ACC, 0 }, CPU_8086 }, + + { "idiv", 1, 0xF6, 7, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + { "idiv", 2, 0xF6, 7, BWL_SUF | W | MODRM, { REG | ANY_MEM, ACC, 0 }, CPU_8086 }, + + { "rol", 2, 0xC0, 0, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "rol", 2, 0xD2, 0, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "rol", 1, 0xD0, 0, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "ror", 2, 0xC0, 1, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "ror", 2, 0xD2, 1, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "ror", 1, 0xD0, 1, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "rcl", 2, 0xC0, 2, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "rcl", 2, 0xD2, 2, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "rcl", 1, 0xD0, 2, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "rcr", 2, 0xC0, 3, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "rcr", 2, 0xD2, 3, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "rcr", 1, 0xD0, 3, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sal", 2, 0xC0, 4, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "sal", 2, 0xD2, 4, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sal", 1, 0xD0, 4, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "shl", 2, 0xC0, 4, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "shl", 2, 0xD2, 4, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "shl", 1, 0xD0, 4, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "shr", 2, 0xC0, 5, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "shr", 2, 0xD2, 5, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "shr", 1, 0xD0, 5, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "sar", 2, 0xC0, 7, BWL_SUF | W | MODRM, { IMM8, REG | ANY_MEM, 0 }, CPU_186 }, + { "sar", 2, 0xD2, 7, BWL_SUF | W | MODRM, { SHIFT_COUNT, REG | ANY_MEM, 0 }, CPU_8086 }, + { "sar", 1, 0xD0, 7, BWL_SUF | W | MODRM, { REG | ANY_MEM, 0, 0 }, CPU_8086 }, + + { "shld", 3, 0x0FA4, NONE, WL_SUF | MODRM, { IMM8, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shld", 3, 0x0FA5, NONE, WL_SUF | MODRM, { SHIFT_COUNT, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shld", 2, 0x0FA5, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + + { "shrd", 3, 0x0FAC, NONE, WL_SUF | MODRM, { IMM8, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shrd", 3, 0x0FAD, NONE, WL_SUF | MODRM, { SHIFT_COUNT, WORD_REG, WORD_REG | ANY_MEM }, CPU_386 }, + { "shrd", 2, 0x0FAD, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + + /* Program control transfer instructions. */ + { "call", 1, 0xE8, NONE, WL_SUF | DEFAULT_SIZE | CALL, { DISP16 | DISP32, 0, 0 }, CPU_8086 }, + { "call", 1, 0xFF, 2, WL_SUF | DEFAULT_SIZE | MODRM, { WORD_REG | ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + { "call", 2, 0x9A, NONE, WL_SUF | DEFAULT_SIZE | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "call", 1, 0xFF, 3, INTEL_SUF | DEFAULT_SIZE | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + + /* Alternative syntax. */ + { "lcall", 2, 0x9A, NONE, WL_SUF | DEFAULT_SIZE | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "lcall", 1, 0xFF, 3, WL_SUF | DEFAULT_SIZE | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + +#define PC_RELATIVE_JUMP 0xEB + + { "jmp", 1, 0xEB, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jmp", 1, 0xFF, 4, WL_SUF | MODRM, { WORD_REG | ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + { "jmp", 2, 0xEA, NONE, WL_SUF | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "jmp", 1, 0xFF, 5, INTEL_SUF | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + + /* Alternative syntax. */ + { "ljmp", 2, 0xEA, NONE, WL_SUF | JUMPINTERSEGMENT, { IMM16, IMM16 | IMM32, 0 }, CPU_8086 }, + { "ljmp", 1, 0xFF, 5, WL_SUF | MODRM, { ANY_MEM | JUMP_ABSOLUTE, 0, 0 }, CPU_8086 }, + + { "ret", 0, 0xC3, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "ret", 1, 0xC2, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, 0, 0 }, CPU_8086 }, + { "retf", 0, 0xCB, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "retf", 1, 0xCA, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, 0, 0 }, CPU_8086 }, + { "lret", 0, 0xCB, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + { "lret", 1, 0xCA, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, 0, 0 }, CPU_8086 }, + { "enter", 2, 0xC8, NONE, WL_SUF | DEFAULT_SIZE, { IMM16, IMM8, 0 }, CPU_186 }, + { "leave", 0, 0xC9, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_186 }, + + /* Conditional jumps. */ + { "jo", 1, 0x70, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jno", 1, 0x71, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jb", 1, 0x72, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jc", 1, 0x72, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnae", 1, 0x72, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnb", 1, 0x73, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnc", 1, 0x73, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jae", 1, 0x73, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "je", 1, 0x74, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jz", 1, 0x74, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jne", 1, 0x75, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnz", 1, 0x75, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jbe", 1, 0x76, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jna", 1, 0x76, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "ja", 1, 0x77, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnbe", 1, 0x77, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "js", 1, 0x78, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jns", 1, 0x79, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jp", 1, 0x7A, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jpe", 1, 0x7A, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnp", 1, 0x7B, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jpo", 1, 0x7B, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jl", 1, 0x7C, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnge", 1, 0x7C, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jge", 1, 0x7D, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnl", 1, 0x7D, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jle", 1, 0x7E, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jng", 1, 0x7E, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jg", 1, 0x7F, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + { "jnle", 1, 0x7F, NONE, NO_SUF | JUMP, { DISP, 0, 0 }, CPU_8086 }, + + { "jcxz", 1, 0xE3, NONE, NO_SUF | JUMPBYTE | SIZE16, { DISP, 0, 0 }, CPU_8086 }, + { "jecxz", 1, 0xE3, NONE, NO_SUF | JUMPBYTE | SIZE32, { DISP, 0, 0 }, CPU_8086 }, + + /* Loop instructions. */ + { "loop", 1, 0xE2, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loopz", 1, 0xE1, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loope", 1, 0xE1, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loopnz", 1, 0xE0, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + { "loopne", 1, 0xE0, NONE, WL_SUF | JUMPBYTE, { DISP, 0, 0 }, CPU_8086 }, + + /* Set byte on flag instructions. */ + { "seto", 1, 0x0F90, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setno", 1, 0x0F91, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setb", 1, 0x0F92, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setc", 1, 0x0F92, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnae", 1, 0x0F92, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnb", 1, 0x0F93, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnc", 1, 0x0F93, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setae", 1, 0x0F93, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "sete", 1, 0x0F94, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setz", 1, 0x0F94, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setne", 1, 0x0F95, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnz", 1, 0x0F95, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setbe", 1, 0x0F96, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setna", 1, 0x0F96, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnbe", 1, 0x0F97, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "seta", 1, 0x0F97, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "sets", 1, 0x0F98, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setns", 1, 0x0F99, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setp", 1, 0x0F9A, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setpe", 1, 0x0F9A, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnp", 1, 0x0F9B, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setpo", 1, 0x0F9B, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setl", 1, 0x0F9C, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnge", 1, 0x0F9C, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnl", 1, 0x0F9D, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setge", 1, 0x0F9D, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setle", 1, 0x0F9E, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setng", 1, 0x0F9E, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setnle", 1, 0x0F9F, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + { "setg", 1, 0x0F9F, 0, B_SUF | MODRM, { REG8 | ANY_MEM, 0, 0 }, CPU_386 }, + + /* String manipulation instructions. */ + { "cmps", 0, 0xA6, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "scmp", 0, 0xA6, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "ins", 0, 0x6C, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_186 }, + { "outs", 0, 0x6E, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_186 }, + { "lods", 0, 0xAC, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "slod", 0, 0xAC, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "movs", 0, 0xA4, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "smov", 0, 0xA4, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "scas", 0, 0xAE, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "ssca", 0, 0xAE, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "stos", 0, 0xAA, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "ssto", 0, 0xAA, NONE, BWL_SUF | W | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + { "xlat", 0, 0xD7, NONE, B_SUF | IS_STRING, { 0, 0, 0 }, CPU_8086 }, + + /* Bit manipulation instructions. */ + { "bsf", 2, 0x0FBC, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "bsr", 2, 0x0FBD, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_386 }, + { "bt", 2, 0x0FA3, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "bt", 2, 0x0FBA, 4, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btc", 2, 0x0FBB, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btc", 2, 0x0FBA, 7, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btr", 2, 0x0FB3, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "btr", 2, 0x0FBA, 6, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "bts", 2, 0x0FAB, NONE, WL_SUF | MODRM, { WORD_REG, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + { "bts", 2, 0x0FBA, 5, WL_SUF | MODRM, { IMM8, WORD_REG | ANY_MEM, 0 }, CPU_386 }, + + /* Interrupts. */ +#define INT_OPCODE 0xCD +#define INT3_OPCODE 0xCC + + { "int", 1, 0xCD, NONE, 0, { IMM8, 0, 0 }, CPU_8086 }, + { "int3", 0, 0xCC, NONE, 0, { 0, 0, 0 }, CPU_8086 }, + { "into", 0, 0xCE, NONE, 0, { 0, 0, 0 }, CPU_8086 }, + { "iret", 0, 0xCF, NONE, WL_SUF | DEFAULT_SIZE, { 0, 0, 0 }, CPU_8086 }, + + { "rsm", 0, 0x0FAA, NONE, 0, { 0, 0, 0 }, CPU_386 }, + { "bound", 2, 0x62, NONE, 0, { WORD_REG, ANY_MEM, 0 }, CPU_186 }, + + { "hlt", 0, 0xF4, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + { "nop", 0, 0x90, NONE, NO_SUF, { 0, 0, 0 }, CPU_8086 }, + + /* Protection control. */ + { "arpl", 2, 0x63, NONE, W_SUF | MODRM | IGNORE_SIZE, { REG16, REG16 | ANY_MEM, 0 }, CPU_286 }, + { "lar", 2, 0x0F02, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_286 }, + { "lgdt", 1, 0x0F01, 2, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "lidt", 1, 0x0F01, 3, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "lldt", 1, 0x0F00, 2, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + { "lmsw", 1, 0x0F01, 6, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + { "lsl", 2, 0x0F03, NONE, WL_SUF | MODRM, { WORD_REG | ANY_MEM, WORD_REG, 0 }, CPU_286 }, + { "ltr", 1, 0x0F00, 3, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + + { "sgdt", 1, 0x0F01, 0, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "sidt", 1, 0x0F01, 1, WL_SUF | MODRM, { ANY_MEM, 0, 0 }, CPU_286 }, + { "sldt", 1, 0x0F00, 0, WL_SUF | MODRM, { WORD_REG | INV_MEM, 0, 0 }, CPU_286 }, + { "sldt", 1, 0x0F00, 0, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, 0, 0 }, CPU_286 }, + { "smsw", 1, 0x0F01, 4, WL_SUF | MODRM, { WORD_REG | INV_MEM, 0, 0 }, CPU_286 }, + { "smsw", 1, 0x0F01, 4, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, 0, 0 }, CPU_286 }, + { "str", 1, 0x0F00, 1, WL_SUF | MODRM, { WORD_REG | INV_MEM, 0, 0 }, CPU_286 }, + { "str", 1, 0x0F00, 1, W_SUF | MODRM | IGNORE_SIZE, { ANY_MEM, 0, 0 }, CPU_286 }, + + { "verr", 1, 0x0F00, 4, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + { "verw", 1, 0x0F00, 5, W_SUF | MODRM | IGNORE_SIZE, { REG16 | ANY_MEM, 0, 0 }, CPU_286 }, + + /* Opcode prefixes. They are allowed as separate instructions too. */ +#define ADDR_PREFIX_OPCODE 0x67 + + { "addr16", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "addr32", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "aword", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "adword", 0, 0x67, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + +#define DATA_PREFIX_OPCODE 0x66 + + { "data16", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "data32", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "word", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE16 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + { "dword", 0, 0x66, NONE, NO_SUF | IS_PREFIX | SIZE32 | IGNORE_SIZE, { 0, 0, 0 }, CPU_386 }, + +#define CS_PREFIX_OPCODE 0x2E + { "cs", 0, 0x2E, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define DS_PREFIX_OPCODE 0x3E + { "ds", 0, 0x3E, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define ES_PREFIX_OPCODE 0x26 + { "es", 0, 0x26, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define FS_PREFIX_OPCODE 0x64 + { "fs", 0, 0x64, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define GS_PREFIX_OPCODE 0x65 + { "gs", 0, 0x65, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, +#define SS_PREFIX_OPCODE 0x36 + { "ss", 0, 0x36, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + +#define REPNE_PREFIX_OPCODE 0xF2 +#define REPE_PREFIX_OPCODE 0xF3 + + { "repne", 0, 0xF2, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "repnz", 0, 0xF2, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "rep", 0, 0xF3, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "repe", 0, 0xF3, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + { "repz", 0, 0xF3, NONE, NO_SUF | IS_PREFIX, { 0, 0, 0 }, CPU_8086 }, + + /* i486 extensions. */ + { "bswap", 1, 0x0FC8, NONE, L_SUF | SHORT_FORM, { REG32, 0, 0 }, CPU_486 }, + + /* End of instructions. */ + { 0, 0, 0, 0, 0, { 0, 0, 0 }, 0 } + +}; + +/* Prefixes are emitted in the following order. */ +#define SEGMENT_PREFIX 0x00 +#define ADDR_PREFIX 0x01 +#define DATA_PREFIX 0x02 +#define REP_PREFIX 0x03 + +#define MAX_PREFIXES 0x04 + +static struct reg_entry reg_table[] = { + + /* 8 bit registers. */ + { "al", REG8 | ACC, 0 }, + { "cl", REG8 | SHIFT_COUNT, 1 }, + { "dl", REG8, 2 }, + { "bl", REG8, 3 }, + { "ah", REG8, 4 }, + { "ch", REG8, 5 }, + { "dh", REG8, 6 }, + { "bh", REG8, 7 }, + + /* 16 bit registers. */ + { "ax", REG16 | ACC, 0 }, + { "cx", REG16, 1 }, + { "dx", REG16 | PORT, 2 }, + { "bx", REG16 | BASE_INDEX, 3 }, + { "sp", REG16, 4 }, + { "bp", REG16 | BASE_INDEX, 5 }, + { "si", REG16 | BASE_INDEX, 6 }, + { "di", REG16 | BASE_INDEX, 7 }, + + /* 32 bit registers. */ + { "eax", REG32 | BASE_INDEX | ACC, 0 }, + { "ecx", REG32 | BASE_INDEX, 1 }, + { "edx", REG32 | BASE_INDEX, 2 }, + { "ebx", REG32 | BASE_INDEX, 3 }, + { "esp", REG32, 4 }, + { "ebp", REG32 | BASE_INDEX, 5 }, + { "esi", REG32 | BASE_INDEX, 6 }, + { "edi", REG32 | BASE_INDEX, 7 }, + + /* Segment registers. */ + { "es", SEGMENT1, 0 }, + { "cs", SEGMENT1, 1 }, + { "ss", SEGMENT1, 2 }, + { "ds", SEGMENT1, 3 }, + { "fs", SEGMENT2, 4 }, + { "gs", SEGMENT2, 5 }, + + /* Segment pseudo-register. */ + { "flat", SEGMENT1, REG_FLAT_NUMBER }, + + /* Control registers. */ + { "cr0", CONTROL, 0 }, + { "cr1", CONTROL, 1 }, + { "cr2", CONTROL, 2 }, + { "cr3", CONTROL, 3 }, + { "cr4", CONTROL, 4 }, + { "cr5", CONTROL, 5 }, + { "cr6", CONTROL, 6 }, + { "cr7", CONTROL, 7 }, + + /* Debug registers. */ + { "db0", DEBUG, 0 }, + { "db1", DEBUG, 1 }, + { "db2", DEBUG, 2 }, + { "db3", DEBUG, 3 }, + { "db4", DEBUG, 4 }, + { "db5", DEBUG, 5 }, + { "db6", DEBUG, 6 }, + { "db7", DEBUG, 7 }, + + /* Other naming. */ + { "dr0", DEBUG, 0 }, + { "dr1", DEBUG, 1 }, + { "dr2", DEBUG, 2 }, + { "dr3", DEBUG, 3 }, + { "dr4", DEBUG, 4 }, + { "dr5", DEBUG, 5 }, + { "dr6", DEBUG, 6 }, + { "dr7", DEBUG, 7 }, + + /* Test registers. */ + { "tr0", TEST, 0 }, + { "tr1", TEST, 1 }, + { "tr2", TEST, 2 }, + { "tr3", TEST, 3 }, + { "tr4", TEST, 4 }, + { "tr5", TEST, 5 }, + { "tr6", TEST, 6 }, + { "tr7", TEST, 7 }, + + /* End of registers. */ + { 0, 0, 0 } + +}; + +#define BYTE_SUFFIX 'b' +#define WORD_SUFFIX 'w' +#define SHORT_SUFFIX 's' +#define DWORD_SUFFIX 'l' +#define QWORD_SUFFIX 'q' + +/* Internal suffix for .intel_syntax. It cannot be directly used by the user. */ +#define INTEL_SUFFIX '\1' + +struct templates *machine_dependent_find_templates (char *name, int check_suffix) { + + char *lname; + + struct hashtab_name *key; + struct templates *entry; + + lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_templates, lname))) { + + if ((entry = hashtab_get (&hashtab_templates, key))) { + + free (lname); + return entry; + + } + + } + + if (check_suffix) { + + char *p2 = lname + strlen (lname); + + switch (p2[-1]) { + + case WORD_SUFFIX: + case BYTE_SUFFIX: + case QWORD_SUFFIX: + + p2[-1] = '\0'; + break; + + case SHORT_SUFFIX: + case DWORD_SUFFIX: + + if (!intel_syntax) { + p2[-1] = '\0'; + } + + break; + + /* Intel syntax only. */ + case 'd': + + if (intel_syntax) { + p2[-1] = '\0'; + } + + break; + + } + + if ((key = hashtab_get_key (&hashtab_templates, lname))) { + + if ((entry = hashtab_get (&hashtab_templates, key))) { + + free (lname); + return entry; + + } + + } + + } + + free (lname); + return 0; + +} + +struct reg_entry *machine_dependent_find_reg_entry (char *name) { + + char *lname; + + struct hashtab_name *key; + struct reg_entry *entry; + + lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_regs, lname))) { + + if ((entry = hashtab_get (&hashtab_regs, key))) { + + free (lname); + return entry; + + } + + } + + free (lname); + return 0; + +} + +#define EXPR_TYPE_SHORT EXPR_TYPE_MACHINE_DEPENDENT_0 +#define EXPR_TYPE_OFFSET EXPR_TYPE_MACHINE_DEPENDENT_1 + +#define EXPR_TYPE_FULL_PTR EXPR_TYPE_MACHINE_DEPENDENT_2 +#define EXPR_TYPE_NEAR_PTR EXPR_TYPE_MACHINE_DEPENDENT_3 +#define EXPR_TYPE_FAR_PTR EXPR_TYPE_MACHINE_DEPENDENT_4 + +#define EXPR_TYPE_BYTE_PTR EXPR_TYPE_MACHINE_DEPENDENT_5 +#define EXPR_TYPE_WORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_6 +#define EXPR_TYPE_DWORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_7 +#define EXPR_TYPE_FWORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_8 +#define EXPR_TYPE_QWORD_PTR EXPR_TYPE_MACHINE_DEPENDENT_9 + +struct modrm_byte { + + unsigned int regmem; + unsigned int reg; + unsigned int mode; + +}; + +struct sib_byte { + + unsigned int base; + unsigned int index; + unsigned int scale; + +}; + +#define MODRM_REGMEM_TWO_BYTE_ADDRESSING 0x04 +#define SIB_BASE_NO_BASE_REGISTER 0x05 +#define SIB_BASE_NO_BASE_REGISTER_16 0x06 +#define SIB_INDEX_NO_INDEX_REGISTER 0x04 + +static struct { + + enum expr_type operand_modifier; + + int is_mem; + int is_indirect; + int has_offset; + + int in_offset; + int in_bracket; + int in_scale; + + struct reg_entry *base_reg; + struct reg_entry *index_reg; + + signed long scale_factor; + struct symbol *segment; + +} intel_state; + +struct instruction { + + struct template template; + char suffix; + + unsigned int log2_scale_factor; + int force_short_jump; + + int operands; + int reg_operands; + int disp_operands; + int mem_operands; + + unsigned int prefixes[MAX_PREFIXES]; + int prefix_count; + + struct modrm_byte modrm; + struct sib_byte sib; + + struct reg_entry *base_reg, *index_reg; + unsigned int types[MAX_OPERANDS]; + + struct expr *imms[MAX_OPERANDS]; + struct expr *disps[MAX_OPERANDS]; + + struct reg_entry *regs[MAX_OPERANDS]; + struct reg_entry *segments[MAX_OPERANDS]; + +}; + +static struct expr operand_exprs[MAX_OPERANDS]; +static struct instruction instruction; + +static struct templates *current_templates = 0; +static int operand_exprs_count; + +static void machine_dependent_set_march (const char *optarg) { + + char *arg, *orig_arg; + char *next; + + arg = orig_arg = xstrdup (optarg); + + if (cpu_extensions_name == 0) { + cpu_extensions_name = xstrdup (""); + } + + if (*arg == '+') { + ++arg; + } + + while (1) { + + unsigned long i; + char saved_c; + + next = strchr (arg, '+'); + + if (next == 0) { + next = arg + strlen (arg); + } + + saved_c = *next; + *next = '\0'; + + for (i = 0; i < ARRAY_SIZE (cpu_archs); i++) { + + if (strcmp (arg, cpu_archs[i].name) == 0) { + + cpu_arch_flags = cpu_archs[i].cpu_flags; + + free (cpu_arch_name); + cpu_arch_name = xstrdup (arg); + + free (cpu_extensions_name); + cpu_extensions_name = xstrdup (""); + + break; + + } + + } + + if (i == ARRAY_SIZE (cpu_archs)) { + + for (i = 0; i < ARRAY_SIZE (cpu_extensions); i++) { + + if (strcmp (arg, cpu_extensions[i].name) == 0) { + + cpu_arch_flags |= cpu_extensions[i].cpu_flags; + cpu_extensions_name = xrealloc (cpu_extensions_name, strlen (cpu_extensions_name) + 1 + 1 + strlen (cpu_extensions[i].name)); + + strcat (cpu_extensions_name, "."); + strcat (cpu_extensions_name, cpu_extensions[i].name); + + break; + + } + + } + + if (i == ARRAY_SIZE (cpu_extensions)) { + + for (i = 0; i < ARRAY_SIZE (cpu_no_extensions); i++) { + + if (strcmp (arg, cpu_no_extensions[i].name) == 0) { + + cpu_arch_flags &= ~cpu_no_extensions[i].cpu_flags; + cpu_extensions_name = xrealloc (cpu_extensions_name, strlen (cpu_extensions_name) + 1 + 1 + strlen (cpu_no_extensions[i].name)); + + strcat (cpu_extensions_name, "."); + strcat (cpu_extensions_name, cpu_no_extensions[i].name); + + break; + + } + + } + + if (i == ARRAY_SIZE (cpu_no_extensions)) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid march option: '%s'", optarg); + } + + } + + } + + if (saved_c == '\0') { + break; + } + + *next = saved_c; + arg = next + 1; + + } + + free (orig_arg); + +} + + +static void handler_8086 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i8086"); + +} + +static void handler_8087 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i8086+8087"); + +} + +static void handler_186 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i186"); + +} + +static void handler_286 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i286"); + +} + +static void handler_287 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i286+287"); + +} + +static void handler_386 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i386"); + +} + +static void handler_387 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i386+387"); + +} + +static void handler_486 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i486"); + +} + +static void handler_586 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i586"); + +} + +static void handler_686 (char *start, char **pp) { + + (void) start; + (void) pp; + + machine_dependent_set_march ("i686"); + +} + +static void handler_extern (char *start, char **pp) { + + struct symbol *symbol; + char *name, *caret, *qualifier; + + for (;;) { + + caret = (*pp = skip_whitespace (*pp)); + + if (!(name = symname (pp))) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "expected symbol name"); + + ignore_rest_of_line (pp); + return; + + } + + if ((symbol = symbol_find (name))) { + + if (symbol->scope == SYMBOL_SCOPE_LOCAL || symbol->scope == SYMBOL_SCOPE_GLOBAL) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } else { + + symbol->scope = SYMBOL_SCOPE_EXTERN; + symbol_set_external (symbol); + + } + + } else { + + symbol = symbol_make (name); + symbol_add_to_chain (symbol); + + symbol->scope = SYMBOL_SCOPE_EXTERN; + symbol_set_external (symbol); + + } + + *pp = skip_whitespace (*pp); + free (name); + + if (**pp == ':') { + + *pp = skip_whitespace (*pp + 1); + + if (!(qualifier = symname (pp))) { + error: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid qualifier type"); + + ignore_rest_of_line (pp); + return; + + } + + *pp = skip_whitespace (*pp); + + if (xstrcasecmp (qualifier, "byte") && xstrcasecmp (qualifier, "word") && xstrcasecmp (qualifier, "dword")) { + + free (qualifier); + goto error; + + } + + free (qualifier); + + } + + if (**pp != ',') { + break; + } + + (*pp)++; + + } + +} + +static void handler_model (char *start, char **pp) { + + char *caret = (*pp = skip_whitespace (*pp)); + char *model, *lang; + + if (!(model = symname (pp))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "memory model is not found"); + + ignore_rest_of_line (pp); + return; + + } + + *pp = skip_whitespace (*pp); + + if (xstrcasecmp (model, "tiny") == 0) { + + state->data_size = 0; + state->model = 1; + + } else if (xstrcasecmp (model, "small") == 0) { + + state->data_size = 0; + state->model = 2; + + } else if (xstrcasecmp (model, "compact") == 0) { + + state->data_size = 1; + state->model = 3; + + } else if (xstrcasecmp (model, "medium") == 0) { + + state->data_size = 0; + state->model = 4; + + } else if (xstrcasecmp (model, "large") == 0) { + + state->data_size = 1; + state->model = 5; + + } else if (xstrcasecmp (model, "huge") == 0) { + + state->data_size = 2; + state->model = 6; + + } else if (xstrcasecmp (model, "flat") == 0) { + + state->data_size = 0; + state->model = 7; + + if ((cpu_arch_flags & CPU_386)) { + bits = 32; + } + + } else { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "invalid memory model '%s' provided", model); + free (model); + + ignore_rest_of_line (pp); + return; + + } + + free (model); + + if (**pp != ',') { + return; + } + + caret = (*pp = skip_whitespace (*pp + 1)); + + if (!(lang = symname (pp))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no language specified"); + + ignore_rest_of_line (pp); + return; + + } + + *pp = skip_whitespace (*pp); + + if (xstrcasecmp (lang, "c") == 0) { + + state->ext = "_"; + + free (lang); + return; + + } + + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "invalid language '%s' provided", lang); + free (lang); + + ignore_rest_of_line (pp); + +} + +static struct pseudo_op_entry pseudo_op_table[] = { + + { ".8086", &handler_8086 }, + { ".8087", &handler_8087 }, + + { ".186", &handler_186 }, + + { ".286", &handler_286 }, + { ".286p", &handler_286 }, + { ".287", &handler_287 }, + + { ".386", &handler_386 }, + { ".386p", &handler_386 }, + { ".387", &handler_387 }, + + { ".486", &handler_486 }, + { ".486p", &handler_486 }, + + { ".586", &handler_586 }, + { ".686", &handler_686 }, + + { "extern", &handler_extern }, + { "extrn", &handler_extern }, + + { ".model", &handler_model }, + { 0, 0 } + +}; + +void machine_dependent_init (void) { + + struct hashtab_name *key; + + struct reg_entry *reg_entry; + struct templates *templates; + + struct template *template = template_table; + int ch; + + templates = xmalloc (sizeof (*templates)); + templates->name = xstrdup (template->name); + templates->start = template; + + for (;;) { + + template++; + + if (!template->name || strcmp (template->name, (template - 1)->name)) { + + templates->end = template; + + if (hashtab_get_key (&hashtab_templates, templates->name)) { + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", templates->name); + } else { + + if (!(key = hashtab_alloc_name (templates->name))) { + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", templates->name); + } else { + hashtab_put (&hashtab_templates, key, templates); + } + + } + + if (!template->name) { + break; + } + + templates = xmalloc (sizeof (*templates)); + templates->name = xstrdup (template->name); + templates->start = template; + + } + + } + + for (reg_entry = reg_table; reg_entry->name; reg_entry++) { + + if ((reg_entry->type & REG32) && reg_entry->number == 4) { + reg_esp = reg_entry; + } + + if (reg_entry->type & SEGMENT1) { + + switch (reg_entry->number) { + + case 2: + + reg_ss = reg_entry; + break; + + case 3: + + reg_ds = reg_entry; + break; + + } + + } + + if (hashtab_get_key (&hashtab_regs, reg_entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", reg_entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (reg_entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", reg_entry->name); + continue; + + } + + hashtab_put (&hashtab_regs, key, reg_entry); + + } + + for (ch = 0; ch < 255; ch++) { + + if (islower (ch) || isdigit (ch)) { + register_chars_table[ch] = ch; + } else if (isupper (ch)) { + register_chars_table[ch] = tolower (ch); + } + + } + + expr_type_set_rank (EXPR_TYPE_FULL_PTR, intel_syntax ? 10 : 0); + + machine_dependent_set_march ("i8086"); + install_pseudo_op_table (pseudo_op_table); + +} + +void machine_dependent_handle_proc (char *start, char **pp, char *name) { + + struct symbol *symbol; + struct proc *proc; + + (void) pp; + + proc = xmalloc (sizeof (*proc)); + proc->name = xstrdup (name); + + symbol = symbol_label (start, skip_whitespace (start), name); + symbol->scope = SYMBOL_SCOPE_LOCAL; + + proc->filename = xstrdup (get_filename ()); + proc->line_number = get_line_number (); + + vec_push (&state->procs, (void *) proc); + +} + +void machine_dependent_handle_endp (char *start, char **pp, char *name) { + + struct proc *proc; + int last; + + (void) pp; + + if (state->procs.length == 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "block nesting error"); + return; + + } + + last = state->procs.length - 1; + proc = state->procs.data[last]; + + if (strcmp (proc->name, name)) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start), "procedure name does not match"); + return; + + } + + free (proc->name); + free (proc->filename); + + vec_pop (&state->procs); + +} + + +static struct reg_entry bad_register = { "", 0, 0 }; + +static int check_reg (struct reg_entry *reg) { + + if (!(cpu_arch_flags & CPU_386) && (reg->type & (REG32 | SEGMENT2 | CONTROL | DEBUG))) { + return 0; + } + + if ((reg->type & SEGMENT1) && reg->number == REG_FLAT_NUMBER && !intel_syntax) { + return 0; + } + + return 1; + +} + +/** + * Returns 0 when the new prefix is of the same type as already present prefixes, + * 2 when REPE or REPNE prefix is added and 1 when other prefix is added. + */ +static int add_prefix (unsigned char prefix) { + + unsigned int prefix_type; + int ret = 1; + + switch (prefix) { + + case CS_PREFIX_OPCODE: + case DS_PREFIX_OPCODE: + case ES_PREFIX_OPCODE: + case FS_PREFIX_OPCODE: + case GS_PREFIX_OPCODE: + case SS_PREFIX_OPCODE: + + prefix_type = SEGMENT_PREFIX; + break; + + case REPNE_PREFIX_OPCODE: + case REPE_PREFIX_OPCODE: + + prefix_type = REP_PREFIX; + + ret = 2; + break; + + case ADDR_PREFIX_OPCODE: + + prefix_type = ADDR_PREFIX; + break; + + case DATA_PREFIX_OPCODE: + + prefix_type = DATA_PREFIX; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "add_prefix invalid case %i", prefix); + exit (EXIT_FAILURE); + + } + + if (instruction.prefixes[prefix_type]) { + ret = 0; + } + + if (ret) { + + instruction.prefix_count++; + instruction.prefixes[prefix_type] = prefix; + + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "same type of prefix used twice"); + } + + return (ret); + +} + + +static struct reg_entry *parse_register (const char *reg_string, char **end_pp) { + + struct reg_entry *reg; + + char *p, *p_into_reg_name_cleaned; + char reg_name_cleaned[MAX_REG_NAME_SIZE + 1]; + + p = skip_whitespace ((char *) reg_string); + + for (p_into_reg_name_cleaned = reg_name_cleaned; (*(p_into_reg_name_cleaned++) = register_chars_table[(int) *p]) != '\0'; p++) { + + if (p_into_reg_name_cleaned >= reg_name_cleaned + MAX_REG_NAME_SIZE) { + return 0; + } + + } + + if (is_name_part ((int) *p)) { + return 0; + } + + reg = machine_dependent_find_reg_entry (reg_name_cleaned); + *end_pp = p; + + if (!reg) { + return 0; + } else if (check_reg (reg)) { + return reg; + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "register %s cannot be used here", reg->name); + return &bad_register; + +} + +static char *parse_instruction (char *line) { + + const char *expecting_string_instruction = 0; + const struct template *template; + + char *p2; + char saved_ch; + + current_templates = 0; + + while (1) { + + p2 = line = skip_whitespace (line); + + while ((*p2 != ' ') && (*p2 != '\t') && (*p2 != '\0')) { + + *p2 = tolower ((int) *p2); + p2++; + + } + + saved_ch = *p2; + *p2 = '\0'; + + if (line == p2) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expecting mnemonic; got nothing"); + return (line); + + } + + current_templates = machine_dependent_find_templates (line, 0); + + if (saved_ch && (*skip_whitespace (p2 + 1)) && current_templates && (current_templates->start->opcode_modifier & IS_PREFIX)) { + + if ((current_templates->start->opcode_modifier & (SIZE16 | SIZE32)) && ((current_templates->start->opcode_modifier & SIZE32) && (bits ^= 16))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "redundant %s prefix", current_templates->name); + return 0; + + } + + switch (add_prefix (current_templates->start->base_opcode)) { + + case 0: + + return 0; + + case 2: + + expecting_string_instruction = current_templates->name; + break; + + } + + *p2 = saved_ch; + line = p2 + 1; + + } else { + break; + } + + } + + if (current_templates == 0) { + + switch (p2[-1]) { + + case WORD_SUFFIX: + case BYTE_SUFFIX: + case QWORD_SUFFIX: + + instruction.suffix = p2[-1]; + p2[-1] = '\0'; + + break; + + case SHORT_SUFFIX: + case DWORD_SUFFIX: + + if (!intel_syntax) { + + instruction.suffix = p2[-1]; + p2[-1] = '\0'; + + } + + break; + + /* Intel syntax only. */ + case 'd': + + if (intel_syntax) { + + instruction.suffix = DWORD_SUFFIX; + p2[-1] = '\0'; + + } + + break; + + default: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no such instruction '%s'", line); + return 0; + + } + + current_templates = machine_dependent_find_templates (line, 0); + + if (current_templates == 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no such instruction '%s'", line); + return 0; + + } + } + + if (expecting_string_instruction) { + + if (!(current_templates->start->opcode_modifier & IS_STRING)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expecting string instruction after '%s'", expecting_string_instruction); + return 0; + + } + + } + + for (template = current_templates->start; template < current_templates->end; template++) { + + if (template->cpu_flags == 0 || (template->cpu_flags & cpu_arch_flags)) { + goto end; + } + + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%s' is not supported on '%s%s'", + current_templates->name, cpu_arch_name ? cpu_arch_name : DEFAULT_CPU_ARCH_NAME, cpu_extensions_name ? cpu_extensions_name : ""); + + return 0; + +end: + + *p2 = saved_ch; + line = p2; + + return (line); + +} + + +static int intel_simplify_expr (struct expr *expr); + +static int finalize_immediate (struct expr *expr, const char *imm_start) { + + if (expr->type == EXPR_TYPE_INVALID || expr->type == EXPR_TYPE_ABSENT) { + + if (imm_start) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "missing or invalid immediate expression '%s'", imm_start); + } + + return 1; + + } else if (expr->type == EXPR_TYPE_CONSTANT) { + + /* Size will be determined later. */ + instruction.types[instruction.operands] |= IMM16; + + } else { + + /* It is an address and size will determined later. */ + instruction.types[instruction.operands] = IMM8 | IMM16; + + } + + return 0; + +} + +static int finalize_displacement (struct expr *expr, const char *disp_start) { + + if (expr->type == EXPR_TYPE_INVALID || expr->type == EXPR_TYPE_ABSENT) { + + if (disp_start) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "missing or invalid displacement expression '%s'", disp_start); + } + + return 1; + + } + + return 0; + +} + +static int base_index_check (char *operand_string) { + + if (bits == 32) { + + if ((instruction.base_reg && !(instruction.base_reg->type & REG32)) + || (instruction.index_reg && (!(instruction.index_reg->type & BASE_INDEX) || !(instruction.index_reg->type & REG32)))) { + bad: + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%s' is not a valid base/index expression", operand_string); + return 1; + + } + + } else { + + if ((instruction.base_reg && (!(instruction.base_reg->type & BASE_INDEX) || !(instruction.base_reg->type & REG16))) + || (instruction.index_reg && (!(instruction.index_reg->type & BASE_INDEX) || !(instruction.index_reg->type & REG16) + || !(instruction.base_reg && instruction.base_reg->number < 6 && instruction.index_reg->number >= 6 + && instruction.log2_scale_factor == 0)))) { + goto bad; + } + + } + + return 0; + +} + +static int intel_parse_operand (char *start, char *operand_string) { + + int ret; + + struct expr expr_buf, *expr; + char *operand_start; + + memset (&intel_state, 0, sizeof (intel_state)); + intel_state.operand_modifier = EXPR_TYPE_ABSENT; + + expr = &expr_buf; + operand_start = operand_string; + + intel_syntax = -1; + expression_read_into (start, &operand_string, expr); + + ret = intel_simplify_expr (expr); + intel_syntax = 1; + + operand_string = skip_whitespace (operand_string); + + if (*operand_string) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "junk '%s' after expression", operand_string); + return 1; + + } else if (!intel_state.has_offset && operand_string > operand_start && strrchr (operand_start, ']') && skip_whitespace (strrchr (operand_start, ']') + 1) == operand_string) { + + intel_state.is_mem |= 1; + intel_state.is_indirect = 1; + + } + + if (!ret) { + return 1; + } + + ret = 0; + + if (intel_state.operand_modifier != EXPR_TYPE_ABSENT && current_templates->start->base_opcode != 0x8D /* lea */) { + + char suffix = 0; + + switch (intel_state.operand_modifier) { + + case EXPR_TYPE_BYTE_PTR: + + suffix = BYTE_SUFFIX; + break; + + case EXPR_TYPE_WORD_PTR: + + suffix = WORD_SUFFIX; + break; + + case EXPR_TYPE_DWORD_PTR: + + if (bits != 32 && ((current_templates->start->opcode_modifier & JUMP) || (current_templates->start->opcode_modifier & CALL))) { + suffix = INTEL_SUFFIX; + } else { + suffix = DWORD_SUFFIX; + } + + break; + + case EXPR_TYPE_FWORD_PTR: + + /* lgdt, lidt, sgdt, sidt accept fword ptr but ignore it. */ + if ((current_templates->name[0] == 'l' || current_templates->name[0] == 's') && (current_templates->name[1] == 'g' || current_templates->name[1] == 'i') && current_templates->name[2] == 'd' && current_templates->name[3] == 't' && current_templates->name[4] == '\0') { + break; + } + + if (bits == 16) { + add_prefix (DATA_PREFIX_OPCODE); + } + + suffix = INTEL_SUFFIX; + break; + + case EXPR_TYPE_QWORD_PTR: + + suffix = QWORD_SUFFIX; + break; + + case EXPR_TYPE_FAR_PTR: + + suffix = INTEL_SUFFIX; + break; + + default: + + break; + + } + + if (!instruction.suffix) { + instruction.suffix = suffix; + } else if (instruction.suffix != suffix) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "conficting operand size modifiers"); + return 1; + + } + + } + + if ((current_templates->start->opcode_modifier & JUMP) || (current_templates->start->opcode_modifier & CALL) || (current_templates->start->opcode_modifier & JUMPINTERSEGMENT)) { + + int is_absolute_jump = 0; + + if (instruction.regs[instruction.operands] || intel_state.base_reg || intel_state.index_reg || intel_state.is_mem > 1) { + is_absolute_jump = 1; + } else { + + switch (intel_state.operand_modifier) { + + case EXPR_TYPE_NEAR_PTR: + + if (intel_state.segment) { + is_absolute_jump = 1; + } else { + intel_state.is_mem = 1; + } + + break; + + case EXPR_TYPE_FAR_PTR: + case EXPR_TYPE_ABSENT: + + if (!intel_state.segment) { + + intel_state.is_mem = 1; + + if (intel_state.operand_modifier == EXPR_TYPE_ABSENT) { + + if (intel_state.is_indirect) { + is_absolute_jump = 1; + } + + break; + + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "cannot infer the segment part of the operand"); + return 1; + + } else if (symbol_get_section (intel_state.segment) == reg_section) { + is_absolute_jump = 1; + } else { + + /* Something like "jmp 12:34" must be converted into "jmp 12, 34". */ + instruction.imms[instruction.operands] = &operand_exprs[operand_exprs_count++]; + memset (instruction.imms[instruction.operands], 0, sizeof (*instruction.imms[instruction.operands])); + + instruction.imms[instruction.operands]->type = EXPR_TYPE_SYMBOL; + instruction.imms[instruction.operands]->add_symbol = intel_state.segment; + + resolve_expression (instruction.imms[instruction.operands]); + + if (finalize_immediate (instruction.imms[instruction.operands], operand_start)) { + return 1; + } + + instruction.operands++; + + if (instruction.suffix == INTEL_SUFFIX) { + instruction.suffix = 0; + } + + intel_state.segment = 0; + intel_state.is_mem = 0; + + } + + break; + + default: + + is_absolute_jump = 1; + break; + + } + + } + + if (is_absolute_jump) { + + instruction.types[instruction.operands] |= JUMP_ABSOLUTE; + intel_state.is_mem |= 1; + + } + + } + + if (instruction.regs[instruction.operands]) { + + instruction.types[instruction.operands] |= instruction.regs[instruction.operands]->type & ~BASE_INDEX; + instruction.reg_operands++; + + } else if (intel_state.base_reg || intel_state.index_reg || intel_state.segment || intel_state.is_mem) { + + if (instruction.mem_operands >= 1) { + + /** + * Handles "call 0x9090, 0x9090", "lcall 0x9090, 0x9090", + * "jmp 0x9090, 0x9090", "ljmp 0x9090, 0x9090". + */ + if (((current_templates->start->opcode_modifier & JUMP) || (current_templates->start->opcode_modifier & CALL) || (current_templates->start->opcode_modifier & JUMPINTERSEGMENT)) && instruction.operands == 1 && instruction.mem_operands == 1 && instruction.disp_operands == 1 && intel_state.segment == 0 && intel_state.operand_modifier == EXPR_TYPE_ABSENT) { + + instruction.operands = 0; + + if (!finalize_immediate (instruction.disps[instruction.operands], 0)) { + + instruction.imms[instruction.operands] = instruction.disps[instruction.operands]; + instruction.operands = 1; + + operand_exprs[operand_exprs_count] = *expr; + instruction.imms[instruction.operands] = &operand_exprs[operand_exprs_count++]; + + resolve_expression (instruction.imms[instruction.operands]); + + if (!finalize_immediate (instruction.imms[instruction.operands], operand_start)) { + + instruction.mem_operands = 0; + instruction.disp_operands = 0; + + instruction.operands = 2; + instruction.types[0] &= ~ANY_MEM; + + return 0; + + } + + } + + } + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "too many memory references for '%s'", current_templates->name); + return 1; + + } + + if (intel_state.base_reg && intel_state.index_reg && (intel_state.base_reg->type & REG16) && (intel_state.index_reg->type & REG16) && intel_state.base_reg->number >= 6 && intel_state.index_reg->number < 6) { + + /* Converts [si + bp] to [bp + si] as addition is commutative but other code accepts only (%bp,%si), not (%si,%bp). */ + instruction.base_reg = intel_state.index_reg; + instruction.index_reg = intel_state.base_reg; + + } else { + + instruction.base_reg = intel_state.base_reg; + instruction.index_reg = intel_state.index_reg; + + } + + if (instruction.base_reg || instruction.index_reg) { + instruction.types[instruction.operands] |= BASE_INDEX; + } + + operand_exprs[operand_exprs_count] = *expr; + expr = &operand_exprs[operand_exprs_count++]; + + resolve_expression (expr); + + if (expr->type != EXPR_TYPE_CONSTANT || expr->add_number || !(instruction.types[instruction.operands] & BASE_INDEX)) { + + instruction.disps[instruction.operands] = expr; + instruction.disp_operands++; + + instruction.types[instruction.operands] |= DISP16; + + if (finalize_displacement (instruction.disps[instruction.operands], operand_start)) { + return 1; + } + + } + + if (intel_state.segment) { + + int more_than_1_segment = 0; + + while (1) { + + expr = symbol_get_value_expression (intel_state.segment); + + if (expr->type != EXPR_TYPE_FULL_PTR || symbol_get_value_expression (expr->op_symbol)->type != EXPR_TYPE_REGISTER) { + break; + } + + intel_state.segment = expr->add_symbol; + more_than_1_segment = 1; + + } + + if (expr->type != EXPR_TYPE_REGISTER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "segment register name expected"); + return 1; + + } + + if ((reg_table[expr->add_number].type & (SEGMENT1 | SEGMENT2)) == 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 1; + + } + + if (more_than_1_segment) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "redundant segment overrides"); + } + + if (reg_table[expr->add_number].number == REG_FLAT_NUMBER) { + instruction.segments[instruction.operands] = 0; + } else { + instruction.segments[instruction.operands] = ®_table[expr->add_number]; + } + + } + + if (base_index_check (operand_start)) { + return 1; + } + + instruction.mem_operands++; + + } else { + + operand_exprs[operand_exprs_count] = *expr; + instruction.imms[instruction.operands] = &operand_exprs[operand_exprs_count++]; + + resolve_expression (instruction.imms[instruction.operands]); + ret = finalize_immediate (instruction.imms[instruction.operands], operand_start); + + } + + instruction.operands++; + return ret; + +} + +static int parse_operands (char *start, char **p_line) { + + char *line = *p_line; + + while (*line != '\0') { + + char *token_start; + int skipped_comma = 0; + + line = skip_whitespace (line); + token_start = line; + + while ((*line != ',')) { + + if (*line == '\0') { + break; + } else if (line[0] == '\'' && line[1] == ',' && !skipped_comma) { + + line += 2; + + skipped_comma = 1; + continue; + + } + + line++; + + } + + if (token_start != line) { + + int ret = 1; + char saved_ch; + + saved_ch = *line; + *line = '\0'; + + ret = intel_parse_operand (start, token_start); + *line = saved_ch; + + if (ret) { + + *p_line = line; + return 1; + + } + + } + + if (line[0] == '#' || (line[0] == '/' && line[1] == '/')) { + break; + } + + if (line[0] == '/' && line[1] == '*') { + + while (*line) { + + if (line[0] == '*' && line[1] == '/') { + + line += 2; + break; + + } + + line++; + + } + + } + + if (*line == ',') { line++; } + + } + + *p_line = line; + return 0; + +} + +static int intel_simplify_symbol (struct symbol *symbol) { + + int ret = intel_simplify_expr (symbol_get_value_expression (symbol)); + + if (ret == 2) { + + symbol_set_section (symbol, absolute_section); + ret = 1; + + } + + return ret; + +} + +static void intel_fold_symbol_into_expr (struct expr *expr, struct symbol *symbol) { + + struct expr *symbol_expr = symbol_get_value_expression (symbol); + + if (symbol_get_section (symbol) == absolute_section) { + + signed int add_number = expr->add_number; + + *expr = *symbol_expr; + expr->add_number += add_number; + + } else { + + expr->type = EXPR_TYPE_SYMBOL; + expr->add_symbol = symbol; + expr->op_symbol = 0; + + } + +} + +static int intel_process_register_expr (struct expr *expr) { + + int reg_num = expr->add_number; + + if (intel_state.in_offset || instruction.operands < 0) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if (!intel_state.in_bracket) { + + if (instruction.regs[instruction.operands]) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if ((reg_table[reg_num].type & SEGMENT1) && reg_table[reg_num].number == REG_FLAT_NUMBER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of pseudo-register"); + return 0; + + } + + instruction.regs[instruction.operands] = reg_table + reg_num; + + } else if (!intel_state.base_reg && !intel_state.in_scale) { + intel_state.base_reg = reg_table + reg_num; + } else if (!intel_state.index_reg) { + intel_state.index_reg = reg_table + reg_num; + } else { + intel_state.index_reg = 0; + } + + return 2; + +} + +static int intel_simplify_expr (struct expr *expr) { + + int ret; + + switch (expr->type) { + + case EXPR_TYPE_INDEX: + + if (expr->add_symbol) { + + if (!intel_simplify_symbol (expr->add_symbol)) { + return 0; + } + + } + + if (!intel_state.in_offset) { + intel_state.in_bracket++; + } + + ret = intel_simplify_symbol (expr->op_symbol); + + if (!intel_state.in_offset) { + intel_state.in_bracket--; + } + + if (!ret) { + return 0; + } + + if (expr->add_symbol) { + expr->type = EXPR_TYPE_ADD; + } else { + intel_fold_symbol_into_expr (expr, expr->op_symbol); + } + + break; + + case EXPR_TYPE_OFFSET: + + intel_state.has_offset = 1; + + intel_state.in_offset++; + ret = intel_simplify_symbol (expr->add_symbol); + intel_state.in_offset--; + + if (!ret) { + return 0; + } + + intel_fold_symbol_into_expr (expr, expr->add_symbol); + return ret; + + case EXPR_TYPE_MULTIPLY: + + if (intel_state.in_bracket) { + + struct expr *scale_expr = 0; + + if (!intel_state.in_scale++) { + intel_state.scale_factor = 1; + } + + ret = intel_simplify_symbol (expr->add_symbol); + + if (ret && intel_state.index_reg) { + scale_expr = symbol_get_value_expression (expr->op_symbol); + } + + if (ret) { + ret = intel_simplify_symbol (expr->op_symbol); + } + + if (ret && !scale_expr && intel_state.index_reg) { + scale_expr = symbol_get_value_expression (expr->add_symbol); + } + + if (ret && scale_expr) { + + resolve_expression (scale_expr); + + if (scale_expr->type != EXPR_TYPE_CONSTANT) { + scale_expr->add_number = 0; + } + + intel_state.scale_factor *= scale_expr->add_number; + + } + + intel_state.in_scale--; + if (!ret ) { return 0; } + + if (!intel_state.in_scale) { + + switch (intel_state.scale_factor) { + + case 1: + + instruction.log2_scale_factor = 0; + break; + + case 2: + + instruction.log2_scale_factor = 1; + break; + + case 4: + + instruction.log2_scale_factor = 2; + break; + + case 8: + + instruction.log2_scale_factor = 3; + break; + + default: + + intel_state.index_reg = 0; + break; + + } + + } + + break; + + } + + goto default_; + + case EXPR_TYPE_SHORT: + + instruction.force_short_jump = 1; + goto ptr_after_setting_operand_modifier; + + case EXPR_TYPE_BYTE_PTR: + case EXPR_TYPE_WORD_PTR: + case EXPR_TYPE_DWORD_PTR: + case EXPR_TYPE_FWORD_PTR: + case EXPR_TYPE_QWORD_PTR: + case EXPR_TYPE_NEAR_PTR: + case EXPR_TYPE_FAR_PTR: + + if (intel_state.operand_modifier == EXPR_TYPE_ABSENT) { + intel_state.operand_modifier = expr->type; + } + + ptr_after_setting_operand_modifier: + + if (symbol_get_value_expression (expr->add_symbol)->type == EXPR_TYPE_REGISTER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if (!intel_simplify_symbol (expr->add_symbol)) { + return 0; + } + + intel_fold_symbol_into_expr (expr, expr->add_symbol); + break; + + case EXPR_TYPE_FULL_PTR: + + if (symbol_get_value_expression (expr->op_symbol)->type == EXPR_TYPE_REGISTER) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid use of register"); + return 0; + + } + + if (!intel_simplify_symbol (expr->op_symbol)) { + return 0; + } + + if (!intel_state.in_offset) { + + if (!intel_state.segment) { + intel_state.segment = expr->add_symbol; + } else { + + struct expr temp_expr = { 0 }; + + temp_expr.type = EXPR_TYPE_FULL_PTR; + temp_expr.add_symbol = expr->add_symbol; + temp_expr.op_symbol = intel_state.segment; + + intel_state.segment = make_expr_symbol (&temp_expr); + + } + + } + + intel_fold_symbol_into_expr (expr, expr->op_symbol); + break; + + case EXPR_TYPE_REGISTER: + + if ((ret = intel_process_register_expr (expr)) == 2) { + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + } + + return ret; + + default: + default_: + + if (expr->add_symbol && !intel_simplify_symbol (expr->add_symbol)) { + return 0; + } + + if (expr->op_symbol && !intel_simplify_symbol (expr->op_symbol)) { + return 0; + } + + break; + + } + + if (expr->type == EXPR_TYPE_SYMBOL && !intel_state.in_offset) { + + struct section *section = symbol_get_section (expr->add_symbol); + + if (section != absolute_section && section != expr_section && section != reg_section) { + intel_state.is_mem |= 2 - !intel_state.in_bracket; + } + + } + + return 1; + +} + + +struct intel_type { + + const char *name; + + enum expr_type expr_type; + unsigned int size[2]; + +}; + +struct intel_operator { + + const char *name; + + enum expr_type expr_type; + unsigned int operands; + +}; + +static const struct intel_operator intel_operators[] = { + + { "and", EXPR_TYPE_BIT_AND, 2 }, + { "eq", EXPR_TYPE_EQUAL, 2 }, + { "ge", EXPR_TYPE_GREATER_EQUAL, 2 }, + { "gt", EXPR_TYPE_GREATER, 2 }, + { "le", EXPR_TYPE_LESSER_EQUAL, 2 }, + { "lt", EXPR_TYPE_LESSER, 2 }, + { "mod", EXPR_TYPE_MODULUS, 2 }, + { "ne", EXPR_TYPE_NOT_EQUAL, 2 }, + { "not", EXPR_TYPE_BIT_NOT, 1 }, + { "offset", EXPR_TYPE_OFFSET, 1 }, + { "or", EXPR_TYPE_BIT_INCLUSIVE_OR, 2 }, + { "shl", EXPR_TYPE_LEFT_SHIFT, 2 }, + { "shr", EXPR_TYPE_RIGHT_SHIFT, 2 }, + { "short", EXPR_TYPE_SHORT, 1 }, + { "xor", EXPR_TYPE_BIT_EXCLUSIVE_OR, 2 }, + + { 0, EXPR_TYPE_INVALID, 0 } + +}; + +#define INTEL_TYPE(name, size) { #name, EXPR_TYPE_##name##_PTR, { size, size } } + +static const struct intel_type intel_types[] = { + + INTEL_TYPE (BYTE, 1), + INTEL_TYPE (WORD, 2), + INTEL_TYPE (DWORD, 4), + INTEL_TYPE (FWORD, 6), + + { "near", EXPR_TYPE_NEAR_PTR, { 0xFF02, 0xFF04 } }, + { "far", EXPR_TYPE_FAR_PTR, { 0xFF05, 0xFF06 } }, + + { 0, EXPR_TYPE_INVALID, { 0, 0 } } + +}; + +#undef INTEL_TYPE + +static int intel_parse_name (struct expr *expr, char *name) { + + int i; + + if (strcmp (name, "$") == 0) { + + current_location (expr); + return 1; + + } + + for (i = 0; intel_types[i].name; i++) { + + if (xstrcasecmp (name, intel_types[i].name) == 0) { + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_symbol = 0; + expr->op_symbol = 0; + + expr->add_number = intel_types[i].size[0]; + return 1; + + } + + } + + return 0; + +} + +static void swap_2_operands (unsigned int op1, unsigned int op2) { + + int temp_type; + + struct reg_entry *temp_reg; + struct expr *temp_expr; + + temp_type = instruction.types[op1]; + instruction.types[op1] = instruction.types[op2]; + instruction.types[op2] = temp_type; + + temp_reg = instruction.regs[op1]; + instruction.regs[op1] = instruction.regs[op2]; + instruction.regs[op2] = temp_reg; + + temp_expr = instruction.disps[op1]; + instruction.disps[op1] = instruction.disps[op2]; + instruction.disps[op2] = temp_expr; + + temp_expr = instruction.imms[op1]; + instruction.imms[op1] = instruction.imms[op2]; + instruction.imms[op2] = temp_expr; + +} + +static void swap_operands (void) { + + swap_2_operands (0, instruction.operands - 1); + + if (instruction.mem_operands == 2) { + + struct reg_entry *seg = instruction.segments[0]; + instruction.segments[0] = instruction.segments[1]; + instruction.segments[1] = seg; + + } + +} + +static int fits_in_signed_byte (signed long number) { + return ((number >= -128) && (number <= 127)); +} + +static int fits_in_unsigned_byte (signed long number) { + return ((number & 0xff) == number); +} + +static int fits_in_signed_word (signed long number) { + return ((number >= -32768) && (number <= 32767)); +} + +static int fits_in_unsigned_word (signed long number) { + return ((number & 0xffff) == number); +} + +static unsigned int smallest_imm_type (long number) { + + if (fits_in_signed_byte (number)) { + return (IMM8S | IMM8 | IMM16 | IMM32); + } + + if (fits_in_unsigned_byte (number)) { + return (IMM8 | IMM16 | IMM32); + } + + if (fits_in_signed_word (number) || fits_in_unsigned_word (number)) { + return (IMM16 | IMM32); + } + + return IMM32; + +} + +static void optimize_size_of_disps (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & DISP) { + + if (instruction.disps[operand]->type == EXPR_TYPE_CONSTANT) { + + unsigned long disp = instruction.disps[operand]->add_number; + + if (instruction.types[operand] & DISP32) { + + disp &= 0xffffffff; + disp = (disp ^ (1UL << 31)) - (1UL << 31); + + } + + if ((instruction.types[operand] & (DISP16 | DISP32)) && fits_in_signed_byte (disp)) { + instruction.types[operand] |= DISP8; + } + + } + + } + + } + +} + +static void optimize_size_of_imms (void) { + + char guessed_suffix = 0; + int operand; + + if (instruction.suffix) { + guessed_suffix = instruction.suffix; + } else if (instruction.reg_operands) { + + /** + * Guesses a suffix from the last register operand + * what is good enough for shortening immediates + * but the real suffix cannot be set yet. + * Example: mov $1234, %al + */ + for (operand = instruction.operands; --operand >= 0; ) { + + if (instruction.types[operand] & REG) { + + guessed_suffix = ((instruction.types[operand] & REG8) ? BYTE_SUFFIX : (instruction.types[operand] & REG16) ? WORD_SUFFIX : DWORD_SUFFIX); + break; + + } + + } + + } else if ((bits == 16) ^ (instruction.prefixes[DATA_PREFIX] != 0)) { + + /** + * Immediate shortening for 16 bit code. + * Example: .code16\n push $12341234 + */ + guessed_suffix = WORD_SUFFIX; + + } + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & IMM) { + + if (instruction.imms[operand]->type == EXPR_TYPE_CONSTANT) { + + /* If a suffix is given, it is allowed to shorten the immediate. */ + switch (guessed_suffix) { + + case BYTE_SUFFIX: + + instruction.types[operand] |= IMM8 | IMM8S | IMM16 | IMM32; + break; + + case WORD_SUFFIX: + + instruction.types[operand] |= IMM16 | IMM32; + break; + + case DWORD_SUFFIX: + + instruction.types[operand] |= IMM32; + break; + + } + + if (instruction.types[0] & IMM32) { + + instruction.imms[operand]->add_number &= 0xffffffff; + instruction.imms[operand]->add_number = ((instruction.imms[operand]->add_number ^ (1UL << 31)) - (1UL << 31)); + + } + + instruction.types[operand] |= smallest_imm_type (instruction.imms[operand]->add_number); + + } + + } + + } + +} + +#define MATCH(overlap, operand_type) (((overlap) & ~JUMP_ABSOLUTE) && (((operand_type) & (BASE_INDEX | JUMP_ABSOLUTE)) == ((overlap) & (BASE_INDEX | JUMP_ABSOLUTE)))) + +static int match_template (void) { + + unsigned int found_reverse_match = 0, suffix_check = 0; + struct template *template; + + switch (instruction.suffix) { + + case BYTE_SUFFIX: + + suffix_check = NO_BSUF; + break; + + case WORD_SUFFIX: + + suffix_check = NO_WSUF; + break; + + case SHORT_SUFFIX: + + suffix_check = NO_SSUF; + break; + + case DWORD_SUFFIX: + + suffix_check = NO_LSUF; + break; + + case QWORD_SUFFIX: + + suffix_check = NO_QSUF; + break; + + case INTEL_SUFFIX: + + suffix_check = NO_INTELSUF; + break; + + } + + for (template = current_templates->start; template < current_templates->end; template++) { + + unsigned int operand_type_overlap0, operand_type_overlap1, operand_type_overlap2; + + if (instruction.operands != template->operands) { + continue; + } + + if (template->cpu_flags && (template->cpu_flags & cpu_arch_flags) == 0) { + continue; + } + + if (template->opcode_modifier & suffix_check) { + continue; + } + + if (instruction.suffix == DWORD_SUFFIX && !(cpu_arch_flags & CPU_386) && !(template->opcode_modifier & IGNORE_SIZE)) { + continue; + } + + if (instruction.operands == 0) { + break; + } + + operand_type_overlap0 = instruction.types[0] & template->operand_types[0]; + + switch (template->operands) { + + case 1: + + if (!MATCH (operand_type_overlap0, instruction.types[0])) { + continue; + } + + if (operand_type_overlap0 == 0) { + continue; + } + + break; + + case 2: + case 3: + + operand_type_overlap1 = instruction.types[1] & template->operand_types[1]; + + if (!MATCH (operand_type_overlap0, instruction.types[0]) || !MATCH (operand_type_overlap1, instruction.types[1])) { + + if ((template->opcode_modifier & D) == 0) { + continue; + } + + operand_type_overlap0 = instruction.types[0] & template->operand_types[1]; + operand_type_overlap1 = instruction.types[1] & template->operand_types[0]; + + if (!MATCH (operand_type_overlap0, instruction.types[0]) || !MATCH (operand_type_overlap1, instruction.types[1])) { + continue; + } + + found_reverse_match = template->opcode_modifier & D; + + } else if (instruction.operands == 3) { + + operand_type_overlap2 = instruction.types[2] & template->operand_types[2]; + + if (!MATCH (operand_type_overlap2, instruction.types[2])) { + continue; + } + + } + + break; + + } + + break; + + } + + if (template == current_templates->end) { + + /* No match was found. */ + report_at (get_filename (), get_line_number (), REPORT_ERROR, "operands invalid for '%s'", current_templates->name); + return 1; + + } + + instruction.template = *template; + + if (state->model < 7) { + + if (template->base_opcode == 0xC3 && xstrcasecmp (template->name, "retn") && instruction.operands == 0 && state->model >= 4 && state->procs.length > 0) { + instruction.template.base_opcode = 0xCB; + } + + if (template->base_opcode == 0xC2 && instruction.operands == 1 && state->model >= 4 && state->procs.length > 0) { + instruction.template.base_opcode = 0xCA; + } + + } + + if (found_reverse_match) { + + instruction.template.base_opcode |= found_reverse_match; + + instruction.template.operand_types[0] = template->operand_types[1]; + instruction.template.operand_types[1] = template->operand_types[0]; + + } + + return 0; + +} + +static int check_byte_reg (void) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if (instruction.types[op] & REG8) { + continue; + } + + if ((instruction.types[op] & WORD_REG) && (instruction.regs[op]->number < 4)) { + + if (!(instruction.template.operand_types[op] & PORT)) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "using '%%%s' instead of '%%%s' due to '%c' suffix", (instruction.regs[op] - ((instruction.types[op] & REG16) ? 8 : 16))->name, instruction.regs[op]->name, instruction.suffix); + } + + continue; + + } + + if (instruction.types[op] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%%%s' not allowed with '%s%c'.", instruction.regs[op]->name, instruction.template.name, instruction.suffix); + return 1; + + } + + } + + return 0; + +} + +static int check_word_reg (void) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if ((instruction.types[op] & REG8) && (instruction.template.operand_types[op] & (REG16 | REG32 | ACC))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%%%s' not allowed with '%s%c'.", instruction.regs[op]->name, instruction.template.name, instruction.suffix); + return 1; + + } + + if ((instruction.types[op] & REG32) && (instruction.template.operand_types[op] & (REG16 | ACC))) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "using '%%%s' instead of '%%%s' due to '%c' suffix", (instruction.regs[op]-8)->name, instruction.regs[op]->name, instruction.suffix); + } + + } + + return 0; + +} + +static int check_dword_reg (void) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if ((instruction.types[op] & REG8) && (instruction.template.operand_types[op] & (REG16 | REG32 | ACC))) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'%%%s' not allowed with '%s%c'.", instruction.regs[op]->name, instruction.template.name, instruction.suffix); + return 1; + + } + + if ((instruction.types[op] & REG16) && (instruction.template.operand_types[op] & (REG32 | ACC))) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "using '%%%s' instead of '%%%s' due to '%c' suffix", (instruction.regs[op]+8)->name, instruction.regs[op]->name, instruction.suffix); + } + + } + + return 0; + +} + +static unsigned int modrm_mode_from_disp_size (unsigned int type) { + return ((type & DISP8) ? 1 : ((type & (DISP16 | DISP32)) ? 2 : 0)); +} + +static int process_suffix (void) { + + int is_movsx_or_movzx = 0; + + if (instruction.template.opcode_modifier & (SIZE16 | SIZE32)) { + + if (instruction.template.opcode_modifier & SIZE16) { + instruction.suffix = WORD_SUFFIX; + } else { + instruction.suffix = DWORD_SUFFIX; + } + + } else if (instruction.reg_operands && (instruction.operands > 1 || (instruction.types[0] & REG))) { + + int saved_operands = instruction.operands; + + is_movsx_or_movzx = ((instruction.template.base_opcode & 0xFF00) == 0x0F00 + && ((instruction.template.base_opcode & 0xFF) | 8) == 0xBE); + + /* For movsx/movzx only the source operand is considered for the ambiguity checking. + * The suffix is replaced to represent the destination later. */ + if (is_movsx_or_movzx && (instruction.template.opcode_modifier & W)) { + instruction.operands--; + } + + if (!instruction.suffix) { + + int op; + + for (op = instruction.operands; --op >= 0; ) { + + if ((instruction.types[op] & REG) && !(instruction.template.operand_types[op] & SHIFT_COUNT) && !(instruction.template.operand_types[op] & PORT)) { + + instruction.suffix = ((instruction.types[op] & REG8) ? BYTE_SUFFIX : (instruction.types[op] & REG16) ? WORD_SUFFIX : DWORD_SUFFIX); + break; + + } + + } + + /* When .att_syntax is used, movsx and movzx silently default to byte memory source. */ + if (is_movsx_or_movzx && (instruction.template.opcode_modifier & W) && !instruction.suffix && !intel_syntax) { + instruction.suffix = BYTE_SUFFIX; + } + + } else { + + int ret; + + switch (instruction.suffix) { + + case BYTE_SUFFIX: + + ret = check_byte_reg (); + break; + + case WORD_SUFFIX: + + ret = check_word_reg (); + break; + + case DWORD_SUFFIX: + + ret = check_dword_reg (); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "process_suffix invalid case %i", instruction.suffix); + exit (EXIT_FAILURE); + + } + + if (ret) { return 1; } + + } + + /* Undoes the movsx/movzx change done above. */ + instruction.operands = saved_operands; + + } else if ((instruction.template.opcode_modifier & DEFAULT_SIZE) && !instruction.suffix) { + instruction.suffix = (bits == 32) ? DWORD_SUFFIX : WORD_SUFFIX; + } else if (!instruction.suffix && ((instruction.template.operand_types[0] & JUMP_ABSOLUTE) || (instruction.template.opcode_modifier & JUMPBYTE) || (instruction.template.opcode_modifier & JUMPINTERSEGMENT) /* lgdt, lidt, sgdt, sidt */ || ((instruction.template.base_opcode == 0x0F01 && instruction.template.extension_opcode <= 3)))) { + + if (bits == 32) { + + if (!(instruction.template.opcode_modifier & NO_LSUF)) { + instruction.suffix = DWORD_SUFFIX; + } + + } else { + + if (!(instruction.template.opcode_modifier & NO_WSUF)) { + instruction.suffix = WORD_SUFFIX; + } + + } + + } + + if (!instruction.suffix + && !(instruction.template.opcode_modifier & IGNORE_SIZE) + && !(instruction.template.opcode_modifier & DEFAULT_SIZE) + /* Explicit data size prefix allows determining the size. */ + && !instruction.prefixes[DATA_PREFIX] + /* fldenv and similar instructions do not require a suffix. */ + && (instruction.template.opcode_modifier & NO_SSUF)) { + + int suffixes = !(instruction.template.opcode_modifier & NO_BSUF); + + if (!(instruction.template.opcode_modifier & NO_WSUF)) { + suffixes |= 1 << 1; + } + + if (!(instruction.template.opcode_modifier & NO_SSUF)) { + suffixes |= 1 << 2; + } + + if (!(instruction.template.opcode_modifier & NO_LSUF)) { + suffixes |= 1 << 3; + } + + if (!(instruction.template.opcode_modifier & NO_INTELSUF)) { + suffixes |= 1 << 4; + } + + if (suffixes & (suffixes - 1)) { + + if (intel_syntax) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "ambiguous operand size for '%s'", instruction.template.name); + return 1; + + } + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "%s, using default for '%s'", intel_syntax + ? "ambiguous operand size" : "no instruction mnemonic suffix given and no register operands", + instruction.template.name); + + if (is_movsx_or_movzx) { + /* Handled below. */ + } else if (bits == 16) { + instruction.suffix = WORD_SUFFIX; + } else if (!(instruction.template.opcode_modifier & NO_LSUF)) { + instruction.suffix = DWORD_SUFFIX; + } else { + instruction.suffix = QWORD_SUFFIX; + } + + } + + } + + if (is_movsx_or_movzx) { + + /* The W modifier applies to the source memory or register, not to the destination register. */ + if ((instruction.template.opcode_modifier & W) && instruction.suffix && instruction.suffix != BYTE_SUFFIX) { + instruction.template.base_opcode |= 1; + } + + /* Changes the suffix to represent the destination and turns off the W modifier as it was already used above. */ + if ((instruction.template.opcode_modifier & W) || !instruction.suffix) { + + if (instruction.types[1] & REG16) { + instruction.suffix = WORD_SUFFIX; + } else { + instruction.suffix = DWORD_SUFFIX; + } + + instruction.template.opcode_modifier &= ~W; + + } + + } + + switch (instruction.suffix) { + + case DWORD_SUFFIX: + case WORD_SUFFIX: + case QWORD_SUFFIX: + + /* Selects word/dword operation. */ + if (instruction.template.opcode_modifier & W) { + + if (instruction.template.opcode_modifier & SHORT_FORM) { + instruction.template.base_opcode |= 8; + } else { + instruction.template.base_opcode |= 1; + } + + } + + /* fall through. */ + + case SHORT_SUFFIX: + + if (instruction.suffix != QWORD_SUFFIX + && !(instruction.template.opcode_modifier & IGNORE_SIZE) + && ((instruction.suffix == DWORD_SUFFIX) == (bits == 16))) { + + unsigned int prefix = DATA_PREFIX_OPCODE; + + if (instruction.template.opcode_modifier & JUMPBYTE) { + prefix = ADDR_PREFIX_OPCODE; + } + + if (!add_prefix (prefix)) { + return 1; + } + + } + + break; + + case 0: + + /* Selects word/dword operation based on explicit data size prefix + * if no suitable register are present. */ + if ((instruction.template.opcode_modifier & W) + && instruction.prefixes[DATA_PREFIX] + && (!instruction.reg_operands + || (instruction.reg_operands == 1 + && !(instruction.template.operand_types[0] & SHIFT_COUNT) + && !(instruction.template.operand_types[0] & PORT) + && !(instruction.template.operand_types[1] & PORT)))) { + + instruction.template.base_opcode |= 1; + + } + + break; + + } + + return 0; + +} + +static int finalize_imms (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + unsigned int overlap = instruction.types[operand] & instruction.template.operand_types[operand]; + + if ((overlap & IMM) && (overlap != IMM8) && (overlap != IMM8S) && (overlap != IMM16) && (overlap != IMM32)) { + + if (instruction.suffix) { + + switch (instruction.suffix) { + + case BYTE_SUFFIX: + + overlap &= IMM8 | IMM8S; + break; + + case WORD_SUFFIX: + + overlap &= IMM16; + break; + + case DWORD_SUFFIX: + + overlap &= IMM32; + break; + + } + + } else if (overlap == (IMM16 | IMM32)) { + + if ((bits == 16) ^ (instruction.prefixes[DATA_PREFIX] != 0)) { + overlap = IMM16; + } else { + overlap = IMM32; + } + + } else if (instruction.prefixes[DATA_PREFIX]) { + overlap &= (bits != 16) ? IMM16 : IMM32; + } + + if ((overlap != IMM8) && (overlap != IMM8S) && (overlap != IMM16) && (overlap != IMM32)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no instruction suffix given; cannot determine immediate size"); + return 1; + + } + + } + + instruction.types[operand] = overlap; + + } + + return 0; + +} + +static const unsigned char segment_prefixes[] = { + + ES_PREFIX_OPCODE, + CS_PREFIX_OPCODE, + SS_PREFIX_OPCODE, + DS_PREFIX_OPCODE, + FS_PREFIX_OPCODE, + GS_PREFIX_OPCODE + +}; + +static int process_operands (void) { + + if (instruction.template.opcode_modifier & REG_DUPLICATION) { + + unsigned int first_reg_operand = (instruction.types[0] & REG) ? 0 : 1; + + instruction.regs[first_reg_operand + 1] = instruction.regs[first_reg_operand]; + instruction.types[first_reg_operand + 1] = instruction.types[first_reg_operand]; + instruction.reg_operands = 2; + + } + + if (instruction.template.opcode_modifier & SHORT_FORM) { + + int operand = (instruction.types[0] & REG) ? 0 : 1; + instruction.template.base_opcode |= instruction.regs[operand]->number; + + } + + if (instruction.template.opcode_modifier & MODRM) { + + if (instruction.reg_operands == 2) { + + unsigned int source, dest; + + source = (instruction.types[0] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) ? 0 : 1; + dest = source + 1; + + instruction.modrm.mode = 3; + + if ((instruction.template.operand_types[dest] & ANY_MEM) == 0) { + + instruction.modrm.regmem = instruction.regs[source]->number; + instruction.modrm.reg = instruction.regs[dest]->number; + + } else { + + instruction.modrm.regmem = instruction.regs[dest]->number; + instruction.modrm.reg = instruction.regs[source]->number; + + } + + } else { + + if (instruction.mem_operands) { + + int fake_zero_displacement = 0; + int operand = 0; + + if (instruction.types[0] & ANY_MEM) { + ; + } else if (instruction.types[1] & ANY_MEM) { + operand = 1; + } else { + operand = 2; + } + + if (instruction.base_reg == 0) { + + instruction.modrm.mode = 0; + + if (instruction.disp_operands == 0) { + fake_zero_displacement = 1; + } + + if (instruction.index_reg == 0) { + + if ((bits == 16) ^ (instruction.prefixes[ADDR_PREFIX] != 0)) { + + instruction.modrm.regmem = SIB_BASE_NO_BASE_REGISTER_16; + instruction.types[operand] = DISP16; + + } else { + + instruction.modrm.regmem = SIB_BASE_NO_BASE_REGISTER; + instruction.types[operand] = DISP32; + + } + + } else { + + instruction.sib.base = SIB_BASE_NO_BASE_REGISTER; + instruction.sib.index = instruction.index_reg->number; + instruction.sib.scale = instruction.log2_scale_factor; + + instruction.modrm.regmem = MODRM_REGMEM_TWO_BYTE_ADDRESSING; + + instruction.types[operand] &= ~DISP; + instruction.types[operand] |= DISP32; + + } + + } else if (instruction.base_reg->type & REG16) { + + switch (instruction.base_reg->number) { + + case 3: + + if (instruction.index_reg == 0) { + instruction.modrm.regmem = 7; + } else { + instruction.modrm.regmem = (instruction.index_reg->number - 6); + } + + break; + + case 5: + + if (instruction.index_reg == 0) { + + instruction.modrm.regmem = 6; + + if ((instruction.types[operand] & DISP) == 0) { + + fake_zero_displacement = 1; + instruction.types[operand] |= DISP8; + + } + + } else { + instruction.modrm.regmem = (instruction.index_reg->number - 6 + 2); + } + + break; + + default: + + instruction.modrm.regmem = (instruction.base_reg->number - 6 + 4); + break; + + } + + instruction.modrm.mode = modrm_mode_from_disp_size (instruction.types[operand]); + + } else { + + if (bits == 16 && (instruction.types[operand] & BASE_INDEX)) { + add_prefix (ADDR_PREFIX_OPCODE); + } + + instruction.modrm.regmem = instruction.base_reg->number; + + instruction.sib.base = instruction.base_reg->number; + instruction.sib.scale = instruction.log2_scale_factor; + + if (instruction.base_reg->number == 5 && instruction.disp_operands == 0) { + + fake_zero_displacement = 1; + instruction.types[operand] |= DISP8; + + } + + if (instruction.index_reg) { + + instruction.sib.index = instruction.index_reg->number; + instruction.modrm.regmem = MODRM_REGMEM_TWO_BYTE_ADDRESSING; + + } else { + instruction.sib.index = SIB_INDEX_NO_INDEX_REGISTER; + } + + instruction.modrm.mode = modrm_mode_from_disp_size (instruction.types[operand]); + + } + + if (fake_zero_displacement) { + + struct expr *expr = &operand_exprs[operand_exprs_count++]; + instruction.disps[operand] = expr; + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + expr->add_symbol = 0; + expr->op_symbol = 0; + + } + + } + + if (instruction.reg_operands) { + + int operand = 0; + + if (instruction.types[0] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) { + ; + } else if (instruction.types[1] & (REG | SEGMENT1 | SEGMENT2 | CONTROL | DEBUG | TEST)) { + operand = 1; + } else { + operand = 2; + } + + if (instruction.template.extension_opcode != NONE) { + instruction.modrm.regmem = instruction.regs[operand]->number; + } else { + instruction.modrm.reg = instruction.regs[operand]->number; + } + + if (instruction.mem_operands == 0) { + instruction.modrm.mode = 3; + } + + } + + if (instruction.template.extension_opcode != NONE) { + instruction.modrm.reg = instruction.template.extension_opcode; + } + + } + + } + + if (instruction.template.opcode_modifier & SEGSHORTFORM) { + + if ((instruction.template.base_opcode == POP_SEGMENT_SHORT) && (instruction.regs[0]->number == 1)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "'pop %%cs' is not valid"); + return 1; + + } + + instruction.template.base_opcode |= instruction.regs[0]->number << 3; + + } + + { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.segments[operand]) { + + add_prefix (segment_prefixes[instruction.segments[operand]->number]); + break; + + } + + } + + } + + return 0; + +} + +static void output_jump (void) { + + struct symbol *symbol; + unsigned int relax_subtype; + + unsigned long offset; + unsigned long opcode_offset_in_buf; + + unsigned int code16 = 0; + + if (bits == 16) { + code16 = RELAX_SUBTYPE_CODE16_JUMP; + } + + if (instruction.prefixes[DATA_PREFIX]) { + + frag_append_1_char (instruction.prefixes[DATA_PREFIX]); + instruction.prefix_count--; + + code16 ^= RELAX_SUBTYPE_CODE16_JUMP; + + } + + if ((instruction.prefixes[SEGMENT_PREFIX] == CS_PREFIX_OPCODE) || (instruction.prefixes[SEGMENT_PREFIX] == DS_PREFIX_OPCODE)) { + + frag_append_1_char (instruction.prefixes[SEGMENT_PREFIX]); + instruction.prefix_count--; + + } + + if (instruction.prefix_count) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "skipping prefixes on this instruction"); + } + + frag_alloc_space (2 + 4); + + opcode_offset_in_buf = current_frag->fixed_size; + frag_append_1_char (instruction.template.base_opcode); + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + + /* "jmp 5" is converted to "temp_label: jmp 1 + temp_label + 5". + * The "1" is the size of the opcode + * and it is included by calling symbol_temp_new_now () + * after the opcode is written above. + */ + instruction.disps[0]->type = EXPR_TYPE_SYMBOL; + instruction.disps[0]->add_symbol = symbol_temp_new_now (); + + } + + symbol = instruction.disps[0]->add_symbol; + offset = instruction.disps[0]->add_number; + + if (!instruction.force_short_jump) { + + if (instruction.template.base_opcode == PC_RELATIVE_JUMP) { + relax_subtype = ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_SHORT_JUMP); + } else if (cpu_arch_flags & CPU_386) { + relax_subtype = ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_SHORT_JUMP); + } else { + relax_subtype = ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_SHORT_JUMP); + } + + relax_subtype |= code16; + frag_set_as_variant (RELAX_TYPE_MACHINE_DEPENDENT, relax_subtype, symbol, offset, opcode_offset_in_buf); + + } else { + frag_set_as_variant (RELAX_TYPE_MACHINE_DEPENDENT, ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_FORCED_SHORT_JUMP, RELAX_SUBTYPE_SHORT_JUMP), symbol, offset, opcode_offset_in_buf); + } + +} + +void machine_dependent_number_to_chars (unsigned char *p, unsigned int number, unsigned int size); + +static void output_call_or_jumpbyte (void) { + + struct fixup *fixup = 0; + int size; + + if (instruction.template.opcode_modifier & JUMPBYTE) { + + size = 1; + + if (instruction.prefixes[ADDR_PREFIX]) { + + frag_append_1_char (instruction.prefixes[ADDR_PREFIX]); + instruction.prefix_count--; + + } + + if ((instruction.prefixes[SEGMENT_PREFIX] == CS_PREFIX_OPCODE) || (instruction.prefixes[SEGMENT_PREFIX] == DS_PREFIX_OPCODE)) { + + frag_append_1_char (instruction.prefixes[SEGMENT_PREFIX]); + instruction.prefix_count--; + + } + + } else { + + unsigned int code16 = 0; + + if (bits == 16) { + code16 = RELAX_SUBTYPE_CODE16_JUMP; + } + + if (instruction.prefixes[DATA_PREFIX]) { + + frag_append_1_char (instruction.prefixes[DATA_PREFIX]); + instruction.prefix_count--; + + code16 ^= RELAX_SUBTYPE_CODE16_JUMP; + + } + + size = code16 ? 2 : 4; + + } + + if (instruction.prefix_count) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "skipping prefixes on this instruction"); + } + + if (state->model < 7 && state->procs.length > 0) { + + if (instruction.template.base_opcode == 0xE8 && size == 2 && state->model >= 4) { + + instruction.template.base_opcode = 0x9A; + size += 2; + + } + + } + + frag_append_1_char (instruction.template.base_opcode); + + if (instruction.template.opcode_modifier & JUMPBYTE || state->model < 4) { + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + + /** + *"call 5" is converted to "temp_label: call 1 + temp_label + 5". + * The "1" is the size of the opcode + * and it is included by calling symbol_temp_new_now () + * after the opcode is written above. + */ + instruction.disps[0]->type = EXPR_TYPE_SYMBOL; + instruction.disps[0]->add_symbol = symbol_temp_new_now (); + + } + + fixup = fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.disps[0], 1, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } else if (state->procs.length == 0 || size == 2 || state->model == 7) { + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + + /* "call 5" is converted to "temp_label: call 1 + temp_label + 5". + * The "1" is the size of the opcode + * and it is included by calling symbol_temp_new_now () + * after the opcode is written above. + */ + instruction.disps[0]->type = EXPR_TYPE_SYMBOL; + instruction.disps[0]->add_symbol = symbol_temp_new_now (); + + } + + fixup = fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.disps[0], 1, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } else { + + unsigned char *p = frag_increase_fixed_size (size); + + if (instruction.disps[0]->type == EXPR_TYPE_CONSTANT) { + machine_dependent_number_to_chars (p, instruction.disps[0]->add_number, size); + } else { + fixup = fixup_new_expr (current_frag, p - current_frag->buf, size, instruction.disps[0], 1, RELOC_TYPE_FAR_CALL); + } + + } + + if (fixup && size == 1) { + fixup->fixup_signed = 1; + } + +} + +static void output_intersegment_jump (void) { + + unsigned int code16 = 0, size; + unsigned char *p; + + if (bits == 16) { + code16 = RELAX_SUBTYPE_CODE16_JUMP; + } + + if (instruction.prefixes[DATA_PREFIX]) { + + frag_append_1_char (instruction.prefixes[DATA_PREFIX]); + instruction.prefix_count--; + + code16 ^= RELAX_SUBTYPE_CODE16_JUMP; + + } + + if (instruction.prefix_count) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "skipping prefixes on this instruction"); + } + + size = code16 ? 2 : 4; + frag_append_1_char (instruction.template.base_opcode); + + /* size for the offset, 2 for the segment. */ + p = frag_increase_fixed_size (size + 2); + + if (instruction.imms[1]->type == EXPR_TYPE_CONSTANT) { + + if ((size == 2) && !fits_in_unsigned_word (instruction.imms[1]->add_number) && !fits_in_signed_word (instruction.imms[1]->add_number)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "16-bit jump out of range."); + return; + + } + + machine_dependent_number_to_chars (p, instruction.imms[1]->add_number, size); + + } else { + fixup_new_expr (current_frag, p - current_frag->buf, size, instruction.imms[1], 0, RELOC_TYPE_DEFAULT); + } + + if (instruction.imms[0]->type != EXPR_TYPE_CONSTANT) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "cannot handle non absolute segment in '%s'", instruction.template.name); + } + + machine_dependent_number_to_chars (p + size, instruction.imms[0]->add_number, size); + +} + +static long convert_number_to_size (unsigned long value, int size) { + + unsigned long mask; + + switch (size) { + + case 1: + + mask = 0xff; + break; + + case 2: + + mask = 0xffff; + break; + + case 4: + + mask = 0xffffffff; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "convert_number_to_size invalid case %i", size); + exit (EXIT_FAILURE); + + } + + if ((value & ~mask) && ((value & ~mask) != ~mask)) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "%ld shortened to %ld", value, value & mask); + } + + value &= mask; + return value; + +} + +static int disp_size (unsigned int operand_type) { + + if (operand_type & DISP8) { + return 1; + } + + if (operand_type & DISP16) { + return 2; + } + + return 4; + +} + +static void output_disps (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & DISP) { + + int size = disp_size (instruction.types[operand]); + + if (instruction.disps[operand]->type == EXPR_TYPE_CONSTANT) { + + unsigned long value = convert_number_to_size (instruction.disps[operand]->add_number, size); + machine_dependent_number_to_chars (frag_increase_fixed_size (size), value, size); + + } else { + + fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.disps[operand], 0, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } + + } + + } + +} + +static int imm_size (unsigned int operand_type) { + + if (operand_type & (IMM8 | IMM8S)) { + return 1; + } + + if (operand_type & IMM16) { + return 2; + } + + return 4; + +} + +static void output_imms (void) { + + int operand; + + for (operand = 0; operand < instruction.operands; operand++) { + + if (instruction.types[operand] & IMM) { + + int size = imm_size (instruction.types[operand]); + + if (instruction.imms[operand]->type == EXPR_TYPE_CONSTANT) { + + unsigned long value = convert_number_to_size (instruction.imms[operand]->add_number, size); + machine_dependent_number_to_chars (frag_increase_fixed_size (size), value, size); + + } else { + + fixup_new_expr (current_frag, current_frag->fixed_size, size, instruction.imms[operand], 0, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } + + } + + } + +} + + +enum expr_type machine_dependent_parse_operator (char **pp, char *name, char *original_saved_c, unsigned int operands) { + + unsigned int i; + + if (!name) { + + if (operands != 2) { + return EXPR_TYPE_INVALID; + } + + switch (**pp) { + + case ':': + + (*pp)++; + return EXPR_TYPE_FULL_PTR; + + case '[': + + (*pp)++; + return EXPR_TYPE_INDEX; + + } + + return EXPR_TYPE_INVALID; + + } + + for (i = 0; intel_types[i].name; i++) { + + if (xstrcasecmp (name, intel_types[i].name) == 0) { + break; + } + + } + + if (intel_types[i].name && *original_saved_c == ' ') { + + char *second_name, ch; + (*pp)++; + + second_name = *pp; + ch = get_symbol_name_end (pp); + + if (xstrcasecmp (second_name, "ptr") == 0) { + + second_name[-1] = *original_saved_c; + *original_saved_c = ch; + + return intel_types[i].expr_type; + + } + + **pp = ch; + *pp = second_name - 1; + + return EXPR_TYPE_ABSENT; + + } + + for (i = 0; intel_operators[i].name; i++) { + + if (xstrcasecmp (name, intel_operators[i].name) == 0) { + + if (operands != intel_operators[i].operands) { + return EXPR_TYPE_INVALID; + } + + return intel_operators[i].expr_type; + + } + + } + + return EXPR_TYPE_ABSENT; + +} + +extern struct section *current_section; + +struct section *machine_dependent_simplified_expression_read_into (char *start, char **pp, struct expr *expr) { + + struct section *ret_section; + int ret; + + memset (&intel_state, 0, sizeof (intel_state)); + intel_state.operand_modifier = EXPR_TYPE_ABSENT; + + instruction.operands = -1; + intel_syntax = -1; + + ret_section = expression_read_into (start, pp, expr); + ret = intel_simplify_expr (expr); + + intel_syntax = 1; + + if (!ret) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "bad machine-dependent expression"); + expr->type = EXPR_TYPE_INVALID; + + } + + return ret_section; + +} + +int machine_dependent_force_relocation_local (struct fixup *fixup) { + return fixup->pcrel == 0; +} + +int machine_dependent_need_index_operator (void) { + return intel_syntax < 0; +} + +int machine_dependent_parse_name (char **pp, struct expr *expr, char *name, char *original_saved_c) { + + struct reg_entry *reg; + char *orig_end; + + orig_end = *pp; + **pp = *original_saved_c; + + reg = parse_register (name, pp); + + if (reg && orig_end <= *pp) { + + *original_saved_c = **pp; + **pp = '\0'; + + if (reg != &bad_register) { + + expr->type = EXPR_TYPE_REGISTER; + expr->add_number = reg - reg_table; + + } else { + expr->type = EXPR_TYPE_INVALID; + } + + return 1; + + } + + *pp = orig_end; + **pp = '\0'; + + return (intel_syntax ? intel_parse_name (expr, name) : 0); + +} + +signed long machine_dependent_estimate_size_before_relax (struct frag *frag, struct section *section) { + + if (symbol_get_section (frag->symbol) != section) { + + int size = (frag->relax_subtype & RELAX_SUBTYPE_CODE16_JUMP) ? 2 : 4; + + unsigned char *opcode_pos = frag->buf + frag->opcode_offset_in_buf; + unsigned long old_frag_fixed_size = frag->fixed_size; + + switch (TYPE_FROM_RELAX_SUBTYPE (frag->relax_subtype)) { + + case RELAX_SUBTYPE_UNCONDITIONAL_JUMP: + + *opcode_pos = 0xE9; + + fixup_new (frag, frag->fixed_size, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + frag->fixed_size += size; + + break; + + case RELAX_SUBTYPE_CONDITIONAL_JUMP86: + + if (size == 2) { + + /* Negates the condition and jumps past unconditional jump. */ + opcode_pos[0] ^= 1; + opcode_pos[1] = 3; + + /* Inserts the unconditional jump. */ + opcode_pos[2] = 0xE9; + + frag->fixed_size += 4; + fixup_new (frag, old_frag_fixed_size + 2, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + + break; + + } + + /* fall through. */ + + case RELAX_SUBTYPE_CONDITIONAL_JUMP: + + opcode_pos[1] = opcode_pos[0] + 0x10; + opcode_pos[0] = TWOBYTE_OPCODE; + + fixup_new (frag, frag->fixed_size + 1, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + frag->fixed_size += size + 1; + + break; + + case RELAX_SUBTYPE_FORCED_SHORT_JUMP: + + size = 1; + + fixup_new (frag, frag->fixed_size, size, frag->symbol, frag->offset, 1, RELOC_TYPE_DEFAULT); + frag->fixed_size += size; + + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, + "%s: %lu: machine_dependent_estimate_size_before_relax invalid case", frag->filename, frag->line_number); + exit (EXIT_FAILURE); + + } + + frag->relax_type = RELAX_TYPE_NONE_NEEDED; + return frag->fixed_size - old_frag_fixed_size; + + } + + return relax_table[frag->relax_subtype].size_of_variable_part; + +} + +signed long machine_dependent_pcrel_from (struct fixup *fixup) { + return (fixup->size + fixup->where + fixup->frag->address); +} + +signed long machine_dependent_relax_frag (struct frag *frag, struct section *section, signed long change) { + + unsigned long target; + + unsigned int new_subtype; + signed long aim, growth; + + target = frag->offset; + + if (frag->symbol) { + + target += symbol_get_value (frag->symbol); + + if ((section == symbol_get_section (frag->symbol)) && (frag->relax_marker != frag->symbol->frag->relax_marker)) { + target += change; + } + + } + + aim = target - frag->address - frag->fixed_size; + + if (aim > 0) { + + for (new_subtype = frag->relax_subtype; relax_table[new_subtype].next_subtype; new_subtype = relax_table[new_subtype].next_subtype) { + + if (aim <= relax_table[new_subtype].forward_reach) { + break; + } + + } + + } else if (aim < 0) { + + for (new_subtype = frag->relax_subtype; relax_table[new_subtype].next_subtype; new_subtype = relax_table[new_subtype].next_subtype) { + + if (aim >= relax_table[new_subtype].backward_reach) { + break; + } + + } + + } else { + return 0; + } + + growth = relax_table[new_subtype].size_of_variable_part; + growth -= relax_table[frag->relax_subtype].size_of_variable_part; + + if (growth) { frag->relax_subtype = new_subtype; } + return growth; + +} + +void machine_dependent_apply_fixup (struct fixup *fixup, unsigned long value) { + + unsigned char *p = fixup->where + fixup->frag->buf; + + if (!fixup->add_symbol) { + fixup->done = 1; + } + + if (fixup->reloc_type == RELOC_TYPE_FAR_CALL) { + + if (fixup->add_symbol == 0) { + + if ((long) value >= 65535) { + + value--; + + machine_dependent_number_to_chars (p, value % 16, 2); + machine_dependent_number_to_chars (p + 2, value / 16, 2); + + } else { + + value -= (fixup->where + fixup->frag->address); + value -= fixup->size; + + machine_dependent_number_to_chars (p - 1, 0x0E, 1); + machine_dependent_number_to_chars (p + 1, value + 1, 2); + + machine_dependent_number_to_chars (p, 0xE8, 1); + machine_dependent_number_to_chars (p + 3, 0x90, 1); + + } + + } else { + machine_dependent_number_to_chars (p, 0, fixup->size); + } + + } else { + machine_dependent_number_to_chars (p, value, fixup->size); + } + +} + +void machine_dependent_finish_frag (struct frag *frag) { + + unsigned char *opcode_pos; + + unsigned char *displacement_pos; + long displacement; + + int size; + unsigned long extension = 0; + + opcode_pos = frag->buf + frag->opcode_offset_in_buf; + + displacement_pos = opcode_pos + 1; + displacement = (symbol_get_value (frag->symbol) + frag->offset - frag->address - frag->fixed_size); + + if ((frag->relax_subtype & RELAX_SUBTYPE_LONG_JUMP) == 0) { + + displacement_pos = opcode_pos + 1; + extension = relax_table[frag->relax_subtype].size_of_variable_part; + + if (RELAX_SUBTYPE_FORCED_SHORT_JUMP) { + + if (displacement > relax_table[frag->relax_subtype].forward_reach || displacement < relax_table[frag->relax_subtype].backward_reach) { + report_at (frag->filename, frag->line_number, REPORT_ERROR, "forced short jump out of range"); + } + + } + + } else { + + switch (frag->relax_subtype) { + + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP): + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_UNCONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP): + + extension = relax_table[frag->relax_subtype].size_of_variable_part; + opcode_pos[0] = 0xE9; + + displacement_pos = opcode_pos + 1; + break; + + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG_JUMP): + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG_JUMP): + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP, RELAX_SUBTYPE_LONG16_JUMP): + + extension = relax_table[frag->relax_subtype].size_of_variable_part; + + opcode_pos[1] = opcode_pos[0] + 0x10; + opcode_pos[0] = TWOBYTE_OPCODE; + + displacement_pos = opcode_pos + 2; + break; + + case ENCODE_RELAX_SUBTYPE (RELAX_SUBTYPE_CONDITIONAL_JUMP86, RELAX_SUBTYPE_LONG16_JUMP): + + extension = relax_table[frag->relax_subtype].size_of_variable_part; + + /* Negates the condition and jumps past unconditional jump. */ + opcode_pos[0] ^= 1; + opcode_pos[1] = 3; + + /* Inserts the unconditional jump. */ + opcode_pos[2] = 0xE9; + + displacement_pos = opcode_pos + 3; + break; + + } + + } + + size = DISPLACEMENT_SIZE_FROM_RELAX_SUBSTATE (frag->relax_subtype); + displacement -= extension; + + machine_dependent_number_to_chars (displacement_pos, displacement, size); + frag->fixed_size += extension; + +} + + +void machine_dependent_assemble_line (char *start, char *line) { + + memset (&instruction, 0, sizeof (instruction)); + memset (operand_exprs, 0, sizeof (operand_exprs)); + + operand_exprs_count = 0; + line = parse_instruction (line); + + if (!line || parse_operands (start, &line)) { + return; + } + + /** + * All Intel instructions have reversed operands except "bound" and some other. + * "ljmp" and "lcall" with 2 immediate operands also do not have operands reversed. + */ + if (intel_syntax && instruction.operands > 1 && strcmp (current_templates->name, "bound") && !((instruction.types[0] & IMM) && (instruction.types[1] & IMM))) { + swap_operands (); + } + + optimize_size_of_disps (); + optimize_size_of_imms (); + + if (match_template () || process_suffix () || finalize_imms ()) { + return; + } + + if (instruction.template.operand_types[0] & IMPLICIT_REGISTER) { + instruction.reg_operands--; + } + + if (instruction.template.operand_types[1] & IMPLICIT_REGISTER) { + instruction.reg_operands--; + } + + if (instruction.operands) { + + if (process_operands ()) { + return; + } + + } + + /* int $3 should be converted to the one byte INT3. */ + if (instruction.template.base_opcode == INT_OPCODE && instruction.imms[0]->add_number == 3) { + + instruction.template.base_opcode = INT3_OPCODE; + instruction.operands = 0; + + } + + if (instruction.template.opcode_modifier & JUMP) { + output_jump (); + } else if (instruction.template.opcode_modifier & (CALL | JUMPBYTE)) { + output_call_or_jumpbyte (); + } else if (instruction.template.opcode_modifier & JUMPINTERSEGMENT) { + output_intersegment_jump (); + } else { + + unsigned int i; + + for (i = 0; i < ARRAY_SIZE (instruction.prefixes); i++) { + + if (instruction.prefixes[i]) { + frag_append_1_char (instruction.prefixes[i]); + } + + } + + if (instruction.template.base_opcode & 0xff00) { + frag_append_1_char ((instruction.template.base_opcode >> 8) & 0xff); + } + + frag_append_1_char (instruction.template.base_opcode & 0xff); + + if (instruction.template.opcode_modifier & MODRM) { + + frag_append_1_char (((instruction.modrm.regmem << 0) | (instruction.modrm.reg << 3) | (instruction.modrm.mode << 6))); + + if ((instruction.modrm.regmem == MODRM_REGMEM_TWO_BYTE_ADDRESSING) && (instruction.modrm.mode != 3) && !(instruction.base_reg && (instruction.base_reg->type & REG16))) { + frag_append_1_char (((instruction.sib.base << 0) | (instruction.sib.index << 3) | (instruction.sib.scale << 6))); + } + + } + + output_disps (); + output_imms (); + + } + +} + +void machine_dependent_number_to_chars (unsigned char *p, unsigned int number, unsigned int size) { + + unsigned int i; + + for (i = 0; i < size; i++) { + p[i] = (number >> (8 * i)) & 0xff; + } + +} + +void machine_dependent_parse_operand (char *start, char **pp, struct expr *expr) { + + char *end; + + switch (**pp) { + + case '[': + + end = (*pp)++; + expression_read_into (start, pp, expr); + + if (**pp == ']' && expr->type != EXPR_TYPE_INVALID) { + + (*pp)++; + + if (expr->type == EXPR_TYPE_SYMBOL && expr->add_number == 0) { + + expr->add_number = 1; + + expr->op_symbol = make_expr_symbol (expr); + symbol_get_value_expression (expr->op_symbol)->add_number = 0; + + } else { + expr->op_symbol = make_expr_symbol (expr); + } + + expr->type = EXPR_TYPE_INDEX; + expr->add_number = 0; + expr->add_symbol = 0; + + } else { + + expr->type = EXPR_TYPE_ABSENT; + *pp = end; + + } + + break; + + } + +} diff --git a/intel.h b/intel.h new file mode 100644 index 0000000..98711c8 --- /dev/null +++ b/intel.h @@ -0,0 +1,125 @@ +/****************************************************************************** + * @file intel.h + *****************************************************************************/ +#ifndef _INTEL_H +#define _INTEL_H + +#define MAX_OPERANDS 3 +#define MAX_REG_NAME_SIZE 8 + +struct template { + + const char *name; + int operands; + +#define NO_BSUF (1LU << 6) /* 0x00000040 */ +#define NO_WSUF (1LU << 7) /* 0x00000080 */ +#define NO_SSUF (1LU << 8) /* 0x00000100 */ +#define NO_LSUF (1LU << 21) /* 0x00200000 */ +#define NO_QSUF (1LU << 22) /* 0x00400000 */ +#define NO_INTELSUF (1LU << 23) /* 0x00800000 */ + + unsigned int base_opcode; + unsigned int extension_opcode; + +#define NONE (~0U) + + unsigned long opcode_modifier; + +#define W (1LU << 0) /* 0x00000001 */ +#define D (1LU << 1) /* 0x00000002 */ + +#define MODRM (1LU << 2) /* 0x00000004 */ +#define SHORT_FORM (1LU << 3) /* 0x00000008 */ + +#define JUMP (1LU << 4) /* 0x00000010 */ +#define CALL (1LU << 5) /* 0x00000020 */ + +#define IGNORE_SIZE (1LU << 9) /* 0x00000200 */ +#define DEFAULT_SIZE (1LU << 24) /* 0x01000000 */ +#define SEGSHORTFORM (1LU << 18) /* 0x00040000 */ + +#define JUMPINTERSEGMENT (1LU << 11) /* 0x00000800 */ +#define JUMPBYTE (1LU << 12) /* 0x00001000 */ + +#define SIZE16 (1LU << 13) /* 0x00002000 */ +#define SIZE32 (1LU << 14) /* 0x00004000 */ + +#define IS_PREFIX (1LU << 15) /* 0x00008000 */ +#define IS_STRING (1LU << 16) /* 0x00010000 */ + +#define REG_DUPLICATION (1LU << 17) /* 0x00020000 */ + + unsigned long operand_types[MAX_OPERANDS]; + +#define REG8 0x00000001 +#define REG16 0x00000002 +#define REG32 0x00000004 + +#define REG (REG8 | REG16 | REG32) +#define WORD_REG (REG16 | REG32) + +#define SEGMENT1 0x00000008 +#define SEGMENT2 0x00020000 +#define CONTROL 0x00000010 +#define DEBUG 0x00100000 +#define TEST 0x00200000 + +#define IMM8 0x00000020 +#define IMM8S 0x00000040 +#define IMM16 0x00000080 +#define IMM32 0x00000100 + +#define IMM (IMM8 | IMM8S | IMM16 | IMM32) +#define ENCODABLEIMM (IMM8 | IMM16 | IMM32) + +#define DISP8 0x00000200 +#define DISP16 0x00000400 +#define DISP32 0x00000800 + +#define DISP (DISP8 | DISP16 | DISP32) +#define BASE_INDEX 0x00001000 + +/** + * INV_MEM is for instruction with modrm where general register + * encoding is allowed only in modrm.regmem (control register move). + */ +#define INV_MEM 0x00040000 +#define ANY_MEM (DISP8 | DISP16 | DISP32 | BASE_INDEX | INV_MEM) + +#define ACC 0x00002000 +#define PORT 0x00004000 +#define SHIFT_COUNT 0x00008000 +#define JUMP_ABSOLUTE 0x00010000 + +#define IMPLICIT_REGISTER (SHIFT_COUNT | ACC) + + unsigned int cpu_flags; + +#define CPU_8086 (1U << 0) +#define CPU_186 (1U << 1) +#define CPU_286 (1U << 2) +#define CPU_386 (1U << 3) +#define CPU_486 (1U << 4) +#define CPU_586 (1U << 5) +#define CPU_686 (1U << 6) + +#define CPU_8087 (1U << 7) +#define CPU_287 (1U << 8) +#define CPU_387 (1U << 9) +#define CPU_687 (1U << 10) + +#define CPU_CMOV (1U << 12) + +}; + +#define REG_FLAT_NUMBER (~0U) + +struct reg_entry { + + const char *name; + unsigned int type, number; + +}; + +#endif /* _INTEL_H */ diff --git a/kwd.c b/kwd.c new file mode 100644 index 0000000..73f1209 --- /dev/null +++ b/kwd.c @@ -0,0 +1,634 @@ +/****************************************************************************** + * @file kwd.c + *****************************************************************************/ +#include +#include +#include + +#include "as.h" +#include "expr.h" +#include "fixup.h" +#include "frag.h" +#include "hashtab.h" +#include "kwd.h" +#include "lex.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +extern struct section *machine_dependent_simplified_expression_read_into (char *start, char **pp, struct expr *expr); + +static struct hashtab hashtab_pseudo_ops = { 0 }; +static struct hashtab hashtab_data_pseudo_ops = { 0 }; + +static int read_character (const char *start, char **pp, unsigned long *val) { + + int ch, i; + + switch (ch = *((*pp)++)) { + + case '"': + + return 1; + + case '\0': + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "null character in string; '\"' inserted"); + + (*pp)--; /* Might be the end of line buffer. */ + return 1; + + case '\n': + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "unterminated string; newline inserted"); + + set_line_number (get_line_number () + 1); + *val = ch; + + break; + + case '\\': + + switch (ch = *((*pp)++)) { + + case '0': case '1': + case '2': case '3': + case '4': case '5': + case '6': case '7': + + for (i = 0, *val = 0; isdigit (ch) && (i < 3); (ch = *((*pp)++)), i++) { + *val = *val * 8 + (ch - '0'); + } + + (*pp)--; + break; + + case 'x': case 'X': + + ch = *((*pp)++); + + for (i = 0, *val = 0; isxdigit (ch) && (i < 3); (ch = *((*pp)++)), i++) { + + if (isdigit (ch)) { + *val = *val * 16 + (ch - '0'); + } else if (isupper (ch)) { + *val = *val * 16 + ((ch = 'A') + 10); + } else { + *val = *val * 16 + ((ch = 'a') + 10); + } + + } + + (*pp)--; + break; + + case 'r': + + *val = 13; + break; + + case 'n': + + *val = 10; + break; + + case '\\': + case '"': + + *val = ch; + break; + + default: + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp - 1, "unknown escape sequence: '\\%c'", ch); + + *val = ch; + break; + + } + + break; + + default: + + *val = ch; + break; + + } + + return 0; + +} + +static void align_bytes (char *start, char **pp, int first_arg_is_bytes) { + + signed long alignment; + int fill_specified; + + signed long fill_value = 0, max_bytes_to_skip; + signed long i; + + alignment = get_result_of_absolute_expression (start, pp); + + if (first_arg_is_bytes) { + + /* Converts to log2. */ + for (i = 0; (alignment & 1) == 0; alignment >>= 1, i++); + + if (alignment != 1) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "alignment is not a power of 2"); + } + + alignment = i; + + } + + if (**pp != ',') { + + fill_specified = 0; + max_bytes_to_skip = 0; + + } else { + + *pp = skip_whitespace (*pp + 1); + + if (**pp == ',') { + + fill_specified = 0; + *pp = skip_whitespace (*pp + 1); + + } else { + + fill_value = get_result_of_absolute_expression (start, pp); + fill_specified = 1; + + } + + + if (**pp != ',') { + max_bytes_to_skip = 0; + } else { + + *pp = skip_whitespace (*pp + 1); + max_bytes_to_skip = get_result_of_absolute_expression (start, pp); + + } + + } + + if (fill_specified) { + frag_align (alignment, fill_value, max_bytes_to_skip); + } else { + + if (current_section == text_section) { + frag_align_code (alignment, max_bytes_to_skip); + } else { + frag_align (alignment, 0, max_bytes_to_skip); + } + + } + +} + +static void handle_constant (char *start, char **pp, int size) { + + struct expr expr, val; + + char *temp, *arg; + signed long repeat; + + while (1) { + + *pp = skip_whitespace (*pp); + + if (**pp == '"') { + + unsigned long val; + int i; + + (*pp)++; + + while (!read_character (start, pp, &val)) { + + for (i = 0; i < size; i++) { + frag_append_1_char ((val >> (8 * i)) & 0xff); + } + + } + + } else { + + machine_dependent_simplified_expression_read_into (start, pp, &expr); + + if (!is_end_of_line[(int) **pp]) { + + temp = (*pp = skip_whitespace (*pp)); + + if (is_name_beginner ((int) **pp)) { + + if (!(arg = symname (pp))) { + goto no_repeat; + } + + if (xstrcasecmp (arg, "dup")) { + + free (arg); + goto no_repeat; + + } + + *pp = skip_whitespace (*pp); + machine_dependent_simplified_expression_read_into (start, pp, &val); + + if (val.type != EXPR_TYPE_CONSTANT) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid value for dup"); + + ignore_rest_of_line (pp); + return; + + } + + if (val.add_number != 0 && current_section == bss_section) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "attempt to initialize memory in a nobits section; ignored"); + val.add_number = 0; + + } + + if (val.add_number > 0xff) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "dup value %lu truncated to %lu", val.add_number, val.add_number & 0xff); + val.add_number &= 0xff; + + } + + if (expr.type == EXPR_TYPE_CONSTANT) { + + repeat = expr.add_number; + + if (repeat == 0) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "dup repeat count is zero; ignored"); + goto next; + + } + + if (repeat < 0) { + + report_at (get_filename (), get_line_number (), REPORT_WARNING, "dup repeate count is negative; ignored"); + goto next; + + } + + memset (frag_increase_fixed_size (repeat), val.add_number, repeat); + + } else { + + struct symbol *expr_symbol = make_expr_symbol (&expr); + + unsigned char *p = frag_alloc_space (symbol_get_value (expr_symbol)); + *p = val.add_number; + + frag_set_as_variant (RELAX_TYPE_SPACE, 0, expr_symbol, 0, 0); + + } + + goto next; + + } + + no_repeat: + + *pp = temp; + + } + + if (expr.type == EXPR_TYPE_CONSTANT) { + + int i; + + for (i = 0; i < size; i++) { + frag_append_1_char ((expr.add_number >> (8 * i)) & 0xff); + } + + } else if (expr.type != EXPR_TYPE_INVALID) { + + fixup_new_expr (current_frag, current_frag->fixed_size, size, &expr, 0, RELOC_TYPE_DEFAULT); + frag_increase_fixed_size (size); + + } else { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "value is not a constant"); + return; + + } + + } + + next: + + *pp = skip_whitespace (*pp); + + if (**pp != ',') { + break; + } + + (*pp)++; + + } + +} + + +static void handler_align (char *start, char **pp) { + align_bytes (start, pp, 1); +} + +static void handler_bss (char *start, char **pp) { + + (void) start; + (void) pp; + + section_set (bss_section); + +} + +static void handler_byte (char *start, char **pp) { + handle_constant (start, pp, 1); +} + +static void handler_data (char *start, char **pp) { + + (void) start; + (void) pp; + + section_set (data_section); + +} + +static void handler_end (char *start, char **pp) { + + char *name, *caret = skip_whitespace (*pp); + + if ((name = symname (pp))) { + + if (!(state->end_symbol = symbol_find (name))) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "undefined symbol '%s'", name); + } else if (symbol_is_undefined (state->end_symbol) || state->end_symbol->scope == SYMBOL_SCOPE_EXTERN) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "undefined symbol '%s'", name); + } + + free (name); + + } + +} + +static void handler_global (char *start, char **pp) { + + struct symbol *symbol; + char *name, *caret; + + for (;;) { + + caret = (*pp = skip_whitespace (*pp)); + + if (!(name = symname (pp))) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "expected symbol name"); + + ignore_rest_of_line (pp); + return; + + } + + if ((symbol = symbol_find (name))) { + + if (symbol->scope == SYMBOL_SCOPE_EXTERN) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } else { + + symbol->scope = SYMBOL_SCOPE_GLOBAL; + symbol_set_external (symbol); + + } + + } else { + + symbol = symbol_make (name); + symbol_add_to_chain (symbol); + + symbol->scope = SYMBOL_SCOPE_GLOBAL; + symbol_set_external (symbol); + + } + + *pp = skip_whitespace (*pp); + + if (**pp != ',') { + break; + } + + (*pp)++; + + } + +} + +static void handler_long (char *start, char **pp) { + handle_constant (start, pp, 4); +} + +static void handler_stack (char *start, char **pp) { + + struct section *curr_sect; + struct expr expr; + + machine_dependent_simplified_expression_read_into (start, pp, &expr); + + if (expr.type == EXPR_TYPE_CONSTANT) { + + curr_sect = current_section; + section_set (bss_section); + + memset (frag_increase_fixed_size (expr.add_number), 0, expr.add_number); + section_set (curr_sect); + + } else { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "value is not a constant"); + return; + + } + +} + +static void handler_text (char *start, char **pp) { + + (void) start; + (void) pp; + + section_set (text_section); + +} + +static void handler_word (char *start, char **pp) { + handle_constant (start, pp, 2); +} + +static struct pseudo_op_entry pseudo_op_table[] = { + + { ".code", &handler_text }, + { ".bss", &handler_bss }, + { ".data", &handler_data }, + { ".data?", &handler_bss }, + { ".stack", &handler_stack }, + { ".text", &handler_text }, + + { "align", &handler_align }, + { "end", &handler_end }, + { "global", &handler_global }, + { "public", &handler_global }, + + { 0, 0 } + +}; + +static struct pseudo_op_entry data_pseudo_op_table[] = { + + { "db", &handler_byte }, + { "dd", &handler_long }, + { "dw", &handler_word }, + + { 0, 0 } + +}; + +void install_pseudo_op_table (struct pseudo_op_entry *table) { + + struct pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_pseudo_ops, key, entry); + + } + +} + +void install_data_pseudo_op_table (struct pseudo_op_entry *table) { + + struct pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_pseudo_ops, key, entry); + + } + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_data_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_data_pseudo_ops, key, entry); + + } + +} + +struct pseudo_op_entry *find_poe (char *name) { + + struct hashtab_name *key; + struct pseudo_op_entry *entry; + + char *lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_pseudo_ops, lname))) { + + if ((entry = hashtab_get (&hashtab_pseudo_ops, key))) { + + free (lname); + return entry; + + } + + } + + free (lname); + return 0; + +} + +struct pseudo_op_entry *find_data_poe (char *name) { + + struct hashtab_name *key; + struct pseudo_op_entry *entry; + + char *lname = to_lower (name); + + if ((key = hashtab_get_key (&hashtab_data_pseudo_ops, lname))) { + + if ((entry = hashtab_get (&hashtab_data_pseudo_ops, key))) { + + free (lname); + return entry; + + } + + } + + free (lname); + return 0; + +} + +void keywords_init (void) { + + install_pseudo_op_table (pseudo_op_table); + install_data_pseudo_op_table (data_pseudo_op_table); + +} diff --git a/kwd.h b/kwd.h new file mode 100644 index 0000000..70821ab --- /dev/null +++ b/kwd.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * @file kwd.h + *****************************************************************************/ +#ifndef _KWD_H +#define _KWD_H + +struct pseudo_op_entry { + + const char *name; + void (*handler) (char *start, char **pp); + +}; + +struct pseudo_op_entry *find_poe (char *name); +struct pseudo_op_entry *find_data_poe (char *name); + +void install_pseudo_op_table (struct pseudo_op_entry *table); + +#endif /* _KWD_H */ diff --git a/lex.c b/lex.c new file mode 100644 index 0000000..7fa85a4 --- /dev/null +++ b/lex.c @@ -0,0 +1,35 @@ +/****************************************************************************** + * @file lex.c + *****************************************************************************/ +#include "lex.h" + +char is_end_of_line[256] = { 0 }; +char lex_table[256] = { 0 }; + +void lex_init (void) { + + int i; + + is_end_of_line[0] = 1; + is_end_of_line[10] = 1; + + lex_table[36] = LEX_NAME_START | LEX_NAME_PART; + lex_table[46] = LEX_NAME_START | LEX_NAME_PART; + + for (i = 48; i < 58; i++) { + lex_table[i] = LEX_NAME_PART; + } + + lex_table[63] = LEX_NAME_PART; + + for (i = 65; i < 91; i++) { + lex_table[i] = LEX_NAME_START | LEX_NAME_PART; + } + + lex_table[95] = LEX_NAME_START | LEX_NAME_PART; + + for (i = 97; i < 123; i++) { + lex_table[i] = LEX_NAME_START | LEX_NAME_PART; + } + +} diff --git a/lex.h b/lex.h new file mode 100644 index 0000000..f295e8b --- /dev/null +++ b/lex.h @@ -0,0 +1,18 @@ +/****************************************************************************** + * @file lex.h + *****************************************************************************/ +#ifndef _LEX_H +#define _LEX_H + +#define LEX_NAME_PART 0x0001 +#define LEX_NAME_START 0x0002 + +extern char is_end_of_line[]; +extern char lex_table[]; + +#define is_name_beginner(c) (lex_table[(c)] & LEX_NAME_START) +#define is_name_part(c) (lex_table[(c)] & LEX_NAME_PART) + +void lex_init (void); + +#endif /* _LEX_H */ diff --git a/lib.c b/lib.c new file mode 100644 index 0000000..5635078 --- /dev/null +++ b/lib.c @@ -0,0 +1,542 @@ +/****************************************************************************** + * @file lib.c + *****************************************************************************/ +#include +#include +#include + +#include "as.h" +#include "lex.h" +#include "lib.h" +#include "report.h" + +#if defined (_WIN32) +# define PATHSEP ';' +#else +# define PATHSEP ':' +#endif + +struct as_option { + + const char *name; + int idx, flgs; + +}; + +#define AS_OPTION_NO_ARG 0 +#define AS_OPTION_HAS_ARG 1 + +#define AS_OPTION_NONE 0 +#define AS_OPTION_DEFINE 1 +#define AS_OPTION_FORMAT 2 +#define AS_OPTION_HELP 3 +#define AS_OPTION_INCLUDE 4 +#define AS_OPTION_LISTING 5 +#define AS_OPTION_OUTFILE 6 +#define AS_OPTION_UNDEF 7 + +static struct as_option opts[] = { + + { "-D", AS_OPTION_DEFINE, AS_OPTION_HAS_ARG }, + { "-I", AS_OPTION_INCLUDE, AS_OPTION_HAS_ARG }, + { "-U", AS_OPTION_UNDEF, AS_OPTION_HAS_ARG }, + + { "-f", AS_OPTION_FORMAT, AS_OPTION_HAS_ARG }, + { "-l", AS_OPTION_LISTING, AS_OPTION_HAS_ARG }, + { "-o", AS_OPTION_OUTFILE, AS_OPTION_HAS_ARG }, + + { "--help", AS_OPTION_HELP, AS_OPTION_NO_ARG }, + { 0, 0, 0 } + +}; + + +static int _strstart (const char *val, const char **str) { + + const char *p = *str; + const char *q = val; + + while (*q != '\0') { + + if (*p != *q) { + return 0; + } + + ++p; + ++q; + + } + + *str = p; + return 1; + +} + +static void _print_usage (void) { + + if (program_name) { + + fprintf (stderr, "Usage: %s [options] file\n\n", program_name); + fprintf (stderr, "Options:\n\n"); + + fprintf (stderr, " -Dname[=value] Define 'name' with value 'value'.\n"); + fprintf (stderr, " -I DIR Add DIR to include search path.\n"); + fprintf (stderr, " -Uname Undefine 'name'.\n"); + + fprintf (stderr, "\n"); + fprintf (stderr, " -l FILE Print listings to file FILE.\n"); + fprintf (stderr, " -o OBJFILE Name the object-file output OBJFILE (default a.out).\n"); + + fprintf (stderr, "\n"); + fprintf (stderr, " --help Print this help information.\n"); + + fprintf (stderr, "\n"); + + } + +} + + +char get_symbol_name_end (char **pp) { + + char c = **pp; + + if (is_name_beginner ((int) (*pp)[0])) { + + while (is_name_part ((int) (*pp)[0])) { + (*pp)++; + } + + c = **pp; + + } + + **pp = '\0'; + return c; + +} + +char *skip_whitespace (char *__p) { + + while (*__p == ' ' || *__p == '\t') { + __p++; + } + + return __p; + +} + +char *symname (char **pp) { + + char *p = *pp; + + if (is_name_beginner ((int) **pp)) { + + while (is_name_part ((int) **pp)) { + (*pp)++; + } + + return xstrndup (p, *pp - p); + + } + + return 0; + +} + +char *to_lower (const char *__p) { + + int i, len; + char *p; + + len = strlen (__p); + p = xmalloc (len + 1); + + for (i = 0; i < len; i++) { + p[i] = tolower ((int) __p[i]); + } + + return p; + +} + +char *xstrdup (const char *__p) { + + char *p = xmalloc (strlen (__p) + 1); + + strcpy (p, __p); + return p; + +} + +char *xstrndup (const char *__p, unsigned long __len) { + + char *p = xmalloc (__len + 1); + + memcpy (p, __p, __len); + return p; + +} + +int xstrcasecmp (const char *__s1, const char *__s2) { + + const unsigned char *p1 = (const unsigned char *) __s1; + const unsigned char *p2 = (const unsigned char *) __s2; + + while (*p1 != '\0') { + + if (tolower ((int) *p1) < tolower ((int) *p2)) { + return (-1); + } else if (tolower ((int) *p1) > tolower ((int) *p2)) { + return (1); + } + + p1++; + p2++; + + } + + if (*p2 == '\0') { + return (0); + } + + return (-1); + +} + +int xstrncasecmp (const char *__s1, const char *__s2, unsigned long __len) { + + const unsigned char *p1 = (const unsigned char *) __s1; + const unsigned char *p2 = (const unsigned char *) __s2; + + while (*p1 != '\0' && __len-- > 0) { + + if (tolower ((int) *p1) < tolower ((int) *p2)) { + return (-1); + } else if (tolower ((int) *p1) > tolower ((int) *p2)) { + return (1); + } + + p1++; + p2++; + + } + + if (*p2 == '\0') { + return (0); + } + + return (-1); + +} + +void add_include_path (const char *__p) { + + char *in = xstrdup (__p); + char *temp = in, *p; + + do { + + for (p = temp; *p != '\0' && *p != PATHSEP; p++) { + + if (*p == '\\') { + *p = '/'; + } + + } + + if ((p - temp) > 0) { + + int len = (p - temp); + char *path; + + if (*(p - 1) != '/') { + + path = xmalloc (2 + (p - temp) + 2); + sprintf (path, "-I%.*s/", len, temp); + + } else { + + path = xmalloc (2 + (p - temp) + 1); + sprintf (path, "-I%.*s", len, temp); + + } + + list_append (&state->pplist, path); + + } + + temp = (p + 1); + + } while (*p != '\0'); + + free (in); + +} + +void ignore_rest_of_line (char **pp) { + + while (!is_end_of_line[(int) **pp]) { + + if ((*pp)++[0] == '\"') { + + while (**pp && **pp != '\"') { + + if ((*pp)++[0] == '\\' && **pp) { + (*pp)++; + } + + } + + } + + } + +} + +void parse_args (int argc, char **argv, int optind) { + + struct as_option *popt; + const char *optarg, *r; + + if (argc <= optind) { + + _print_usage (); + exit (EXIT_SUCCESS); + + } + + while (optind < argc) { + + r = argv[optind++]; + + if (r[0] != '-' || r[1] == '\0') { + + if (state->ifile) { + + report_at (program_name, 0, REPORT_ERROR, "more than one file passed as input"); + exit (EXIT_FAILURE); + + } + + state->ifile = xstrdup (r); + continue; + + } + + for (popt = opts; ; popt++) { + + const char *p1 = popt->name; + const char *r1 = r; + + if (!p1) { + + report_at (program_name, 0, REPORT_ERROR, "invalid option -- '%s'", r); + exit (EXIT_FAILURE); + + } + + if (!_strstart (p1, &r1)) { + continue; + } + + optarg = r1; + + if (popt->flgs & AS_OPTION_HAS_ARG) { + + if (*r1 == '\0') { + + if (optind >= argc) { + + report_at (program_name, 0, REPORT_ERROR, "argument to '%s' is missing", r); + exit (EXIT_FAILURE); + + } + + optarg = argv[optind++]; + + } + + } else if (*r1 != '\0') { + continue; + } + + break; + + } + + switch (popt->idx) { + + case AS_OPTION_DEFINE : { + + char *arg; + + if (!strchr (optarg, '=')) { + + arg = xmalloc (2 + strlen (optarg) + 3); + sprintf (arg, "-D%s=1", optarg); + + } else { + + arg = xmalloc (2 + strlen (optarg) + 1); + sprintf (arg, "-D%s", optarg); + + } + + list_append (&state->pplist, arg); + break; + + } + + case AS_OPTION_FORMAT: { + + if (xstrcasecmp (optarg, "bin") == 0) { + + state->format = AS_OUTPUT_BIN; + break; + + } + + if (xstrcasecmp (optarg, "obj") == 0) { + + state->format = AS_OUTPUT_OBJ; + break; + + } + + report_at (program_name, 0, REPORT_ERROR, "unrecognised output format '%s'", optarg); + exit (EXIT_FAILURE); + + } + + case AS_OPTION_HELP: { + + _print_usage (); + exit (EXIT_SUCCESS); + + } + + case AS_OPTION_INCLUDE: { + + add_include_path (optarg); + break; + + } + + case AS_OPTION_LISTING: { + + if (state->lfile) { + + report_at (program_name, 0, REPORT_ERROR, "multiple listing files provided"); + exit (EXIT_FAILURE); + + } + + state->lfile = xstrdup (optarg); + break; + + } + + case AS_OPTION_OUTFILE: { + + if (state->ofile) { + + report_at (program_name, 0, REPORT_ERROR, "multiple output files provided"); + exit (EXIT_FAILURE); + + } + + state->ofile = xstrdup (optarg); + break; + + } + + case AS_OPTION_UNDEF: { + + char *arg = xmalloc (2 + strlen (optarg) + 1); + sprintf (arg, "-U%s", optarg); + + list_append (&state->pplist, arg); + break; + + } + + default: { + + report_at (program_name, 0, REPORT_ERROR, "unsupported option '%s'", r); + exit (EXIT_FAILURE); + + } + + } + + } + + if (!state->ofile) { state->ofile = "a.out"; } + +} + +void *xmalloc (unsigned long __size) { + + void *ptr = malloc (__size); + + if (!ptr && __size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (malloc)"); + exit (EXIT_FAILURE); + + } + + memset (ptr, 0, __size); + return ptr; + +} + +void *xrealloc (void *__ptr, unsigned long __size) { + + void *ptr = realloc (__ptr, __size); + + if (!ptr && __size) { + + report_at (program_name, 0, REPORT_ERROR, "memory full (realloc)"); + exit (EXIT_FAILURE); + + } + + return ptr; + +} + + +static const char *filename = 0; +static unsigned long line_number = 0; + +const char *get_filename (void) { + return filename; +} + +unsigned long get_line_number (void) { + return line_number; +} + +void get_filename_and_line_number (const char **__filename_p, unsigned long *__line_number_p) { + + *__filename_p = filename; + *__line_number_p = line_number; + +} + +void set_filename_and_line_number (const char *__filename, unsigned long __line_number) { + + filename = __filename; + line_number = __line_number; + +} + +void set_filename (const char *__filename) { + filename = __filename; +} + +void set_line_number (unsigned long __line_number) { + line_number = __line_number; +} diff --git a/lib.h b/lib.h new file mode 100644 index 0000000..72e9461 --- /dev/null +++ b/lib.h @@ -0,0 +1,40 @@ +/****************************************************************************** + * @file lib.h + *****************************************************************************/ +#ifndef _LIB_H +#define _LIB_H + +#define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0])) + +char get_symbol_name_end (char **pp); + +char *skip_whitespace (char *__p); +char *symname (char **pp); + +char *xstrdup (const char *__p); +char *xstrndup (const char *__p, unsigned long __len); + +int xstrcasecmp (const char *__s1, const char *__s2); +int xstrncasecmp (const char *__s1, const char *__s2, unsigned long __len); + +char *to_lower (const char *__p); +void ignore_rest_of_line (char **pp); + + +void add_include_path (const char *__p); +void parse_args (int argc, char **argv, int optind); + +void *xmalloc (unsigned long __size); +void *xrealloc (void *__ptr, unsigned long __size); + + +const char *get_filename (void); +unsigned long get_line_number (void); + +void set_filename (const char *__filename); +void set_line_number (unsigned long __line_number); + +void get_filename_and_line_number (const char **__filename_p, unsigned long *__line_number_p); +void set_filename_and_line_number (const char *__filename, unsigned long __line_number); + +#endif /* _LIB_H */ diff --git a/list.c b/list.c new file mode 100644 index 0000000..a7e475b --- /dev/null +++ b/list.c @@ -0,0 +1,39 @@ +/****************************************************************************** + * @file list.c + *****************************************************************************/ +#include "lib.h" +#include "list.h" + +void list_append (struct list **list, void *data) { + + struct list *new = xmalloc (sizeof (*new)); + struct list *old = (*list); + + if (old) { + + new->next = old->next; + old->next = new; + + } else { + new->next = new; + } + + new->data = data; + *list = new; + +} + +unsigned int nlist (struct list *list) { + + unsigned int n = 0; + + if (list) { + + struct list *p = list; + do { n++; } while ((p = p->next) != list); + + } + + return n; + +} diff --git a/list.h b/list.h new file mode 100644 index 0000000..4825c64 --- /dev/null +++ b/list.h @@ -0,0 +1,16 @@ +/****************************************************************************** + * @file list.h + *****************************************************************************/ +#ifndef _LIST_H +#define _LIST_H + +struct list { + + struct list *next; + void *data; + +}; + +void list_append (struct list **list, void *data); + +#endif /* _LIST_H */ diff --git a/listing.c b/listing.c new file mode 100644 index 0000000..3102eb4 --- /dev/null +++ b/listing.c @@ -0,0 +1,304 @@ +/****************************************************************************** + * @file listing.c + *****************************************************************************/ +#include +#include + +#include "as.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "symbol.h" + +struct listing_message { + + char *message; + struct listing_message *next; + +}; + +struct ll { + + char *line; + + const char *filename; + unsigned long line_number; + + struct frag *frag; + + unsigned long where; + unsigned long size; + + int variant_frag; + struct ll *next; + + struct listing_message *messages, *last_message; + +}; + +static struct ll *first_line = NULL; +static struct ll *last_line = NULL; + +static void internal_add_line (char *line, const char *filename, unsigned long line_number) { + + struct ll *ll = xmalloc (sizeof (*ll)); + + ll->line = line; + ll->filename = filename; + ll->line_number = line_number; + ll->frag = current_frag; + + if (current_frag) { + ll->where = current_frag->fixed_size; + } + + if (first_line == NULL) { + + first_line = ll; + last_line = ll; + + } else { + + last_line->next = ll; + last_line = ll; + + } + + ll->messages = ll->last_message = NULL; + +} + +void add_listing_line (char *real_line, unsigned long real_line_len, const char *filename, unsigned long line_number) { + + unsigned long start, i; + char *line; + + for (start = 0, i = 0; i < real_line_len; i++) { + + if (real_line[i] == '\n') { + + line = xmalloc (i - start + 1); + + memcpy (line, real_line + start, i - start); + line[i - start] = '\0'; + + internal_add_line (line, filename, line_number); + line_number++; + + if (i == real_line_len - 1) { + return; + } + + start = i + 1; + + } + + } + + line = xmalloc (i - start + 1); + + memcpy (line, real_line + start, i - start); + line[i - start + 1] = '\0'; + + internal_add_line (line, filename, line_number); + +} + +void add_listing_message (char *message, const char *filename, unsigned long line_number) { + + struct ll *ll; + + for (ll = first_line; ll; ll = ll->next) { + + if (ll->line_number == line_number && strcmp (ll->filename, filename) == 0) { + + struct listing_message *lm = xmalloc (sizeof (*lm)); + + lm->message = message; + lm->next = NULL; + + if (ll->last_message) { + ll->last_message->next = lm; + } else { + ll->messages = lm; + } + + ll->last_message = lm; + return; + + } + + } + +} + +void generate_listing (void) { + + struct ll *ll; + struct symbol *symbol; + + FILE *f = stdout; + + if (state->lfile) { + + if ((f = fopen (state->lfile, "w")) == NULL) { + + report_at (program_name, 0, REPORT_ERROR, "Unable to open '%s' as listing file", state->lfile); + return; + + } + + } + + for (ll = first_line; ll; ll = ll->next) { + + struct listing_message *lm; + unsigned long i, size; + + if (ll->frag == NULL) { + size = 0; + } else if (ll->variant_frag) { + size = ll->frag->fixed_size - ll->where; + } else { + size = ll->size; + } + + fprintf (f, "%05lu ", ll->line_number); + + for (i = 0; i < size; i++) { + + if ((i > 0) && ((i % 16) == 0)) { + fprintf (f, "\n%05lu ", ll->line_number); + } + + fprintf (f, "%02X", ll->frag->buf[ll->where + i]); + + } + + if ((i > 0) && ((i % 16) == 0)) { + fprintf (f, "\n%05lu ", ll->line_number); + } + + i %= 16; + + for ( ; i < 20; i++) { + fprintf (f, " "); + } + + if (ll->frag) { + fprintf (f, "%04lX ", ll->frag->address + ll->where); + } else { + fprintf (f, " "); + } + + fprintf (f, " %s\n", ll->line); + + for (lm = ll->messages; lm; lm = lm->next) { + fprintf (f, "***** %s\n", lm->message); + } + + } + + if (symbols != NULL) { + + unsigned long local_symbols = 0; + unsigned long global_symbols = 0; + unsigned long undefined_symbols = 0; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol)) { + undefined_symbols++; + } else if (symbol_is_external (symbol)) { + global_symbols++; + } else { + local_symbols++; + } + + } + + if (local_symbols > 0) { + + fprintf (f, "\nLOCAL SYMBOLS:\n\n"); + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol) || symbol_is_external (symbol)) { + continue; + } + + fprintf (f, " %08lx %s\n", symbol_get_value (symbol), symbol_get_name (symbol)); + + } + + } else { + fprintf (f, "\nNO LOCAL SYMBOLS\n"); + } + + if (global_symbols > 0) { + + fprintf (f, "\nGLOBAL SYMBOLS:\n\n"); + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol) || !symbol_is_external (symbol)) { + continue; + } + + fprintf (f, " %08lx %s\n", symbol_get_value (symbol), symbol_get_name (symbol)); + + } + + } else { + fprintf (f, "\nNO GLOBAL SYMBOLS\n"); + } + + if (undefined_symbols > 0) { + + fprintf (f, "\nEXTERNAL SYMBOLS:\n\n"); + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_section_symbol (symbol)) { + continue; + } + + if (symbol_is_undefined (symbol)) { + fprintf (f, " %08lx %s\n", symbol_get_value (symbol), symbol_get_name (symbol)); + } + + } + + } else { + fprintf (f, "\nNO EXTERNAL SYMBOLS\n"); + } + + } + + if (state->lfile) { fclose (f); } + +} + +void update_listing_line (struct frag *frag) { + + if (last_line == NULL || last_line->frag == NULL) { return; } + + if (last_line->frag->next == frag) { + last_line->variant_frag = 1; + } else { + last_line->size = last_line->frag->fixed_size - last_line->where; + } + +} diff --git a/listing.h b/listing.h new file mode 100644 index 0000000..9e965db --- /dev/null +++ b/listing.h @@ -0,0 +1,15 @@ +/****************************************************************************** + * @file listing.h + *****************************************************************************/ +#ifndef _LISTING_H +#define _LISTING_H + +#include "frag.h" + +void add_listing_line (char *real_line, unsigned long real_line_len, const char *filename, unsigned long line_number); +void add_listing_message (char *message, const char *filename, unsigned long line_number); + +void generate_listing (void); +void update_listing_line (struct frag *frag); + +#endif /* _LISTING_H */ diff --git a/ll.c b/ll.c new file mode 100644 index 0000000..cb0833f --- /dev/null +++ b/ll.c @@ -0,0 +1,316 @@ +/****************************************************************************** + * @file ll.c + *****************************************************************************/ +#include +#include +#include +#include + +#include "ll.h" +#include "report.h" + +struct load_line_data { + + char *line, *real_line; + + unsigned long capacity, read_size; + unsigned long end_of_prev_real_line; + + unsigned long *new_line_number_p; + +}; + +#define CAPACITY_INCREMENT 256 +extern void get_filename_and_line_number (const char **__filename_p, unsigned long *__line_number_p); + +extern void *xmalloc (unsigned long __size); +extern void *xrealloc (void *__ptr, unsigned long __size); + +int load_line (char **line_p, char **line_end_p, char **real_line_p, unsigned long *real_line_len_p, unsigned long *newlines_p, FILE *ifp, void **load_line_internal_data_p) { + + struct load_line_data *ll_data = *load_line_internal_data_p; + unsigned long pos_in_line = 0, pos_in_real_line = 0, newlines = 0; + + int in_escape = 0, in_double_quote = 0, in_single_quote = 0; + int in_line_comment = 0, skipping_spaces = 0; + + if (ll_data->end_of_prev_real_line) { + + memmove (ll_data->real_line, ll_data->real_line + ll_data->end_of_prev_real_line, ll_data->read_size - ll_data->end_of_prev_real_line); + ll_data->read_size -= ll_data->end_of_prev_real_line; + + } + + while (1) { + + if (pos_in_line >= ll_data->capacity || pos_in_real_line >= ll_data->capacity) { + + ll_data->capacity += CAPACITY_INCREMENT; + + ll_data->line = xrealloc (ll_data->line, ll_data->capacity + 2); + ll_data->real_line = xrealloc (ll_data->real_line, ll_data->capacity + 1); + + } + + if (pos_in_real_line >= ll_data->read_size) { + + ll_data->read_size = fread (ll_data->real_line + pos_in_real_line, 1, ll_data->capacity - pos_in_real_line, ifp) + pos_in_real_line; + + if (ferror (ifp)) { + return 4; + } + + ll_data->real_line[ll_data->read_size] = '\0'; + + } + + copying: + + if (in_line_comment) { + + while (pos_in_real_line < ll_data->read_size) { + + if (ll_data->real_line[pos_in_real_line] == '\n') { + + in_line_comment = 0; + break; + + } + + pos_in_real_line++; + + } + + } + + if (skipping_spaces) { + + while (pos_in_real_line < ll_data->read_size) { + + if (ll_data->real_line[pos_in_real_line] != ' ' && ll_data->real_line[pos_in_real_line] != '\t') { + + skipping_spaces = 0; + break; + + } + + pos_in_real_line++; + + } + + } + + while (pos_in_real_line < ll_data->read_size && pos_in_line < ll_data->capacity) { + + ll_data->line[pos_in_line] = ll_data->real_line[pos_in_real_line++]; + + if (in_double_quote || in_single_quote) { + + if (in_escape) { + in_escape = 0; + } else if (in_double_quote && ll_data->line[pos_in_line] == '"') { + in_double_quote = 0; + } else if (in_single_quote && ll_data->line[pos_in_line] == '\'') { + in_single_quote = 0; + } else if (ll_data->line[pos_in_line] == '\\') { + in_escape = 1; + } + + if (ll_data->line[pos_in_line] == '\n') { + + int pos = pos_in_line; + + if (pos > 0 && ll_data->line[pos - 1] == '\r') { + ll_data->line[--pos] = '\n'; + } + + if (pos > 0) { + + if (ll_data->line[pos - 1] != '\\') { + + ll_data->line[pos + 1] = '\0'; + ll_data->end_of_prev_real_line = pos_in_real_line; + + *line_p = ll_data->line; + *line_end_p = ll_data->line + pos; + + *real_line_p = ll_data->real_line; + *real_line_len_p = pos_in_real_line; + + *newlines_p = newlines; + return 0; + + } else { + + pos_in_line = pos - 1; + + newlines++; + goto copying; + + } + + } + + } + + } else { + + if (ll_data->line[pos_in_line] == ' ' || ll_data->line[pos_in_line] == '\t') { + + /*ll_data->line[pos_in_line++] = ' ';*/ + + /*skipping_spaces = 1; + goto copying;*/ + + if (ll_data->line[pos_in_line] == '\t') { + + int cnt = 4 - (pos_in_line % 4); + int i = 0; + + ll_data->capacity += CAPACITY_INCREMENT; + + ll_data->line = xrealloc (ll_data->line, ll_data->capacity + 2); + ll_data->real_line = xrealloc (ll_data->real_line, ll_data->capacity + 1); + + for (; i < cnt; i++) { + ll_data->line[pos_in_line++] = ' '; + } + + continue; + + } + + } else if (ll_data->line[pos_in_line] == '\n') { + + if (pos_in_line > 0 && ll_data->line[pos_in_line - 1] == '\r') { + ll_data->line[--pos_in_line] = '\n'; + } + + ll_data->line[pos_in_line + 1] = '\0'; + ll_data->end_of_prev_real_line = pos_in_real_line; + + *line_p = ll_data->line; + *line_end_p = ll_data->line + pos_in_line; + + *real_line_p = ll_data->real_line; + *real_line_len_p = pos_in_real_line; + + *newlines_p = newlines; + return 0; + + } else if (ll_data->line[pos_in_line] == '\\') { + + /*ll_data->line[pos_in_line] = ' ';*/ + pos_in_line--; + + while (pos_in_real_line < ll_data->read_size) { + + if (ll_data->real_line[pos_in_real_line] == '\r' || ll_data->real_line[pos_in_real_line] == '\n') { + + if (ll_data->real_line[pos_in_real_line] == '\r') { + pos_in_real_line++; + } + + if (ll_data->real_line[pos_in_real_line] == '\n') { + pos_in_real_line++; + } + + break; + + } + + pos_in_real_line++; + + } + + newlines++; + continue; + + } else if (ll_data->line[pos_in_line] == '"') { + in_double_quote = 1; + } else if (ll_data->line[pos_in_line] == '\'') { + in_single_quote = 1; + } else if (ll_data->line[pos_in_line] == ';') { + + in_line_comment = 1; + goto copying; + + } + + } + + pos_in_line++; + + } + + if (feof (ifp)) { + + const char *filename; + unsigned long line_number; + + if (ll_data->read_size == 0) { + return 1; + } + + ll_data->line[pos_in_line] = '\n'; + ll_data->line[pos_in_line + 1] = '\0'; + + get_filename_and_line_number (&filename, &line_number); + + if (ll_data->new_line_number_p) { + line_number = *(ll_data->new_line_number_p); + } else { + line_number = 0; + } + + report_at (filename, line_number, REPORT_WARNING, "end of file not at end of line; newline inserted"); + + ll_data->end_of_prev_real_line = 0; + ll_data->read_size = 0; + + *line_p = ll_data->line; + *line_end_p = ll_data->line + pos_in_line; + + *real_line_p = ll_data->real_line; + *real_line_len_p = pos_in_real_line; + + *newlines_p = newlines; + return 0; + + } + + } + +} + +void load_line_destroy_internal_data (void *load_line_internal_data) { + + struct load_line_data *ll_data; + + if (load_line_internal_data) { + + ll_data = load_line_internal_data; + + free (ll_data->line); + free (ll_data->real_line); + free (ll_data); + + } + +} + +void *load_line_create_internal_data (unsigned long *new_line_number_p) { + + struct load_line_data *ll_data = xmalloc (sizeof (*ll_data));; + + ll_data->capacity = 0; + ll_data->line = NULL; + ll_data->real_line = NULL; + + ll_data->read_size = 0; + ll_data->end_of_prev_real_line = 0; + + ll_data->new_line_number_p = new_line_number_p; + return ll_data; + +} diff --git a/ll.h b/ll.h new file mode 100644 index 0000000..b6754ef --- /dev/null +++ b/ll.h @@ -0,0 +1,13 @@ +/****************************************************************************** + * @file ll.h + *****************************************************************************/ +#ifndef _LL_H +#define _LL_H + +#include +int load_line (char **line_p, char **line_end_p, char **real_line_p, unsigned long *real_line_len_p, unsigned long *newlines_p, FILE *ifp, void **load_line_internal_data_p); + +void load_line_destroy_internal_data (void *load_line_internal_data); +void *load_line_create_internal_data (unsigned long *new_line_number_p); + +#endif /* _LL_H */ diff --git a/macro.c b/macro.c new file mode 100644 index 0000000..f196af6 --- /dev/null +++ b/macro.c @@ -0,0 +1,555 @@ +/****************************************************************************** + * @file macro.c + *****************************************************************************/ +#include +#include +#include + +#include "cstr.h" +#include "hashtab.h" +#include "lex.h" +#include "lib.h" +#include "macro.h" +#include "report.h" +#include "vector.h" + +static struct hashtab hashtab_macros = { 0 }; + +void remove_all_macros (void) { + + struct macro *m; + int i; + + for (i = 0; i < hashtab_macros.capacity; i++) { + + if (!(m = (struct macro *) &hashtab_macros.entries[i])) { + continue; + } + + free (m->name); + free (m->value); + + } + + memset (&hashtab_macros, 0, sizeof (hashtab_macros)); + +} + +struct hashtab_name *find_macro (char *sname) { + + struct hashtab_name *key; + + if ((key = hashtab_get_key (&hashtab_macros, sname))) { + return key; + } + + return 0; + +} + +struct macro *get_macro (struct hashtab_name *key) { + return hashtab_get (&hashtab_macros, key); +} + +void add_macro (char *start, char **pp, int report_line) { + + char *sname, *caret = *pp, *arg; + unsigned int len; + + struct hashtab_name *key; + struct macro *m; + + if (is_end_of_line[(int) **pp]) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret - 1, "no macro name give in %%define directive"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "no macro name give in %%define directive"); + } + + return; + + } + + if (!(sname = symname (pp))) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro names must be identifiers"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "macro names must be identifiers"); + } + + return; + + } + + if (strcmp (sname, "defined") == 0) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "\"%s\" cannout be used as a macro name", sname); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "\"%s\" cannout be used as a macro name", sname); + } + + return; + + } + + if (**pp != '(' && !isspace ((int) **pp)) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "whitespace is required after macro name"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "whitespace is required after macro name"); + } + + return; + + } + + if ((key = find_macro (sname))) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, caret, "\"%s\" redefined", sname); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "\"%s\" redefined", sname); + } + + if ((m = hashtab_get (&hashtab_macros, key))) { + + while ((arg = vec_pop (&m->args))) { + free (arg); + } + + free (m); + + } + + hashtab_remove (&hashtab_macros, key); + + } else { + + if (!(key = hashtab_alloc_name (sname))) { + + free (sname); + return; + + } + + } + + m = xmalloc (sizeof (*m)); + m->nargs = -1; + + m->type = MACRO_USER; + m->name = sname; + + if (**pp == '(') { + + m->nargs = 0; + (*pp)++; + + while (!is_end_of_line[(int) **pp]) { + + *pp = skip_whitespace (*pp); + + if (**pp == ')') { + break; + } + + if (m->is_variadic) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected ')' after '...'"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected ')' after '...'"); + } + + while ((arg = vec_pop (&m->args))) { + free (arg); + } + + free (m); + return; + + } + + arg = *pp; + + while (!is_end_of_line[(int) *arg] && !isspace ((int) *arg)) { + + if (*arg == ',' || *arg == ')') { + break; + } + + arg++; + + } + + if (arg - *pp == 3) { + + if (memcmp (*pp, "...", 3) == 0) { + + m->is_variadic = 1; + + *pp = arg; + continue; + + } + + } + + if ((sname = symname (pp))) { + + vec_push (&m->args, sname); + m->nargs++; + + *pp = skip_whitespace (*pp); + + if (**pp != ',' && **pp != ')') { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected ',' or, ')' after parameter"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected ',' or, ')' after parameter"); + } + + goto err; + + } + + if (**pp == ')') { + break; + } + + (*pp)++; + continue; + + } + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected parameter name"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected parameter name"); + } + + goto err; + + } + + if (**pp != ')') { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "expected ')' before end of line"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected ')' before end of line"); + } + + goto err; + + } + + (*pp)++; + + } + + *pp = skip_whitespace (*pp); + + m->value = xstrdup (*pp); + len = strlen (m->value); + + if (is_end_of_line[(int) m->value[len - 1]]) { + m->value[len - 1] = '\0'; + } + + hashtab_put (&hashtab_macros, key, m); + + if (!m->is_variadic) { + + char *haystack = m->value, *needle = "__VA_ARGS__"; + char *p; + + while ((p = strstr (haystack, needle))) { + + haystack = (p + strlen (needle)); + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, m->value, p, "%s can only appear in the expansion of a variadic macro", needle); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "%s can only appear in the expansion of a variadic macro", needle); + } + + } + + } + + return; + +err: + + while ((arg = vec_pop (&m->args))) { + free (arg); + } + + free (m); + return; + +} + +void remove_macro (char *start, char **pp, int report_line) { + + char *sname, *caret = *pp; + + struct hashtab_name *key; + struct macro *mp; + + if (!(sname = symname (pp))) { + + if (!(sname = symname (pp))) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro names must be identifiers"); + } else { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "macro names must be identifiers"); + } + + return; + + } + + return; + + } + + if ((key = find_macro (sname))) { + + if ((mp = hashtab_get (&hashtab_macros, key))) { + free (mp); + } + + hashtab_remove (&hashtab_macros, key); + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + + if (report_line) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%undef directive"); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "extra tokens at end of %%undef directive"); + } + + } + +} + +static struct vector *get_macro_args (char *start, char *macro_name, char **pp) { + + static struct vector args_list = { 0 }; + char *arg, saved_ch, ch; + + memset (&args_list, 0, sizeof (args_list)); + + if (**pp == '(') { + + (*pp)++; + + for (;;) { + + *pp = skip_whitespace (*pp); + + if (is_end_of_line[(int) **pp] || **pp == ')') { + break; + } + + arg = *pp; + ch = *arg; + + if (ch == '"' || ch == '\'') { + + (*pp)++; + + while (!is_end_of_line[(int) **pp]) { + + if (**pp == '\\') { + + (*pp)++; + + if (**pp == ch) { + (*pp)++; + } + + continue; + + } + + if (**pp == ch) { break; } + (*pp)++; + + } + + if (**pp == ch) { + (*pp)++; + } else { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, arg, "missing terminating %c character", ch); + } + + } else { + + while (!is_end_of_line[(int) **pp] && **pp != ' ' && **pp != '\t') { + + if (**pp == ',' || **pp == ')') { + break; + } + + (*pp)++; + + } + + } + + saved_ch = **pp; + **pp = '\0'; + + vec_push (&args_list, xstrdup (arg)); + + **pp = saved_ch; + + if (*(*pp = skip_whitespace (*pp)) == ',') { + (*pp)++; + } + + } + + if (**pp != ')') { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, *pp, "unterminated argument list invoking macro \"%s\"", macro_name); + return 0; + + } + + (*pp)++; + + } + + return &args_list; + +} + +static char *process_value (struct macro *m, struct vector *args_list) { + + char *line = m->value, *arg; + int i; + + struct cstring str; + cstr_new (&str); + + while (!is_end_of_line[(int) *line]) { + again: + + if (line[0] == '#' && line[1] == '#') { + + if (str.size && ((char *) str.data)[str.size - 1] == ' ') { str.size--; } + + line += 2; + continue; + + } + + if (is_name_beginner ((int) *line)) { + + arg = symname (&line); + + if (strcmp (arg, "__VA_ARGS__") == 0) { + + for (i = m->nargs; i < args_list->length; i++) { + + arg = args_list->data[i]; + cstr_cat (&str, arg, strlen (arg)); + + if (i < args_list->length - 1) { + + cstr_ccat (&str, ','); + cstr_ccat (&str, ' '); + + } + + } + + continue; + + } + + for (i = 0; i < m->nargs; i++) { + + if (strcmp (m->args.data[i], arg) == 0) { + + arg = args_list->data[i]; + cstr_cat (&str, arg, strlen (arg)); + + goto again; + + } + + } + + cstr_cat (&str, arg, strlen (arg)); + + } else { + cstr_ccat (&str, *line++); + } + + } + + cstr_ccat (&str, '\0'); + return xstrdup (str.data); + +} + +char *process_macro (char *start, char **pp, struct macro *m) { + + struct vector *args_list; + char *caret; + + if (m->nargs > 0 || m->is_variadic) { + + args_list = 0; + caret = *pp; + + if ((args_list = get_macro_args (start, m->name, pp))) { + + if (args_list->length < m->nargs) { + + char *tmp = (m->nargs == 1 ? " argument" : " arguments"); + char *tmp2 = (args_list->length == 1 ? "only " : ""); + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro \"%s\" requires %d%s, but %s%d given", m->name, m->nargs, tmp, tmp2, args_list->length); + + } else if (args_list->length > m->nargs) { + + if (!m->is_variadic) { + + char *tmp = (args_list->length == 1 ? " argument" : " arguments"); + char *tmp2 = (m->nargs == 1 ? "just " : ""); + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "macro \"%s\" passed %d%s, but takes %s%d", m->name, args_list->length, tmp, tmp2, m->nargs); + + } + + } + + } + + return process_value (m, args_list); + + } + + return m->value; + +} + +void push_macro (struct hashtab_name *key, struct macro *m) { + hashtab_put (&hashtab_macros, key, m); +} diff --git a/macro.h b/macro.h new file mode 100644 index 0000000..220bef0 --- /dev/null +++ b/macro.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * @file macro.h + *****************************************************************************/ +#ifndef _MACRO_H +#define _MACRO_H + +#include "vector.h" + +#define MACRO_BUILTIN 0 +#define MACRO_USER 1 + +struct macro { + + char *name, *value; + int is_variadic, type; + + struct vector args; + int nargs; + +}; + +#include "hashtab.h" +struct hashtab_name *find_macro (char *sname); +struct macro *get_macro (struct hashtab_name *key); + +void remove_all_macros (void); + +void add_macro (char *start, char **pp, int report_line); +void remove_macro (char *start, char **pp, int report_line); + +char *process_macro (char *start, char **pp, struct macro *m); +void push_macro (struct hashtab_name *key, struct macro *m); + +#endif /* _MACRO_H */ diff --git a/obj.c b/obj.c new file mode 100644 index 0000000..9eda912 --- /dev/null +++ b/obj.c @@ -0,0 +1,302 @@ +/****************************************************************************** + * @file obj.c + *****************************************************************************/ +#include +#include +#include + +#include "as.h" +#include "fixup.h" +#include "frag.h" +#include "obj.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static void write_to_byte_array (unsigned char *arr, unsigned long value, int size) { + + int i; + + for (i = 0; i < size; i++) { + arr[i] = (value >> (8 * i)) & 0xff; + } + +} + +static int output_relocation (struct fixup *fixup, unsigned long start_address_of_section, FILE *fp) { + + struct relocation_info reloc; + + long log2_of_size, size; + unsigned long r_symbolnum; + + write_to_byte_array (reloc.r_address, fixup->frag->address + fixup->where - start_address_of_section, 4); + + if (symbol_is_section_symbol (fixup->add_symbol)) { + + if (symbol_get_section (fixup->add_symbol) == text_section) { + r_symbolnum = N_TEXT; + } else if (symbol_get_section (fixup->add_symbol) == data_section) { + r_symbolnum = N_DATA; + } else if (symbol_get_section (fixup->add_symbol) == bss_section) { + r_symbolnum = N_BSS; + } else { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "invalid section %s", section_get_name (symbol_get_section (fixup->add_symbol))); + exit (EXIT_FAILURE); + + } + + } else { + + struct symbol *symbol; + long symbol_number; + + for (symbol = symbols, symbol_number = 0; symbol && (symbol != fixup->add_symbol); symbol = symbol->next) { + + if (symbol_is_external (symbol) || symbol_is_undefined (symbol)) { + symbol_number++; + } + + } + + r_symbolnum = symbol_number; + r_symbolnum |= (1LU << 31); + + } + + if (fixup->pcrel) { + r_symbolnum |= (1LU << 28); + } + + for (log2_of_size = -1, size = fixup->size; size; size >>= 1, log2_of_size++); + r_symbolnum |= ((unsigned long) log2_of_size << 29); + + if (fixup->reloc_type == RELOC_TYPE_FAR_CALL) { + r_symbolnum |= (1LU << 27); + } + + write_to_byte_array (reloc.r_symbolnum, r_symbolnum, 4); + + if (fwrite (&reloc, sizeof (reloc), 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "error writing text relocations"); + return 1; + + } + + return 0; + +} + +void output_obj (FILE *fp) { + + unsigned long start_address_of_data; + struct fixup *fixup; + + unsigned long symbol_table_size; + struct symbol *symbol; + + unsigned long string_table_pos; + struct frag *frag; + + unsigned long text_size, data_size, bss_size; + unsigned long tr_size, dr_size; + + struct exec header; + memset (&header, 0, sizeof (header)); + + write_to_byte_array (header.a_info, 0x00640000 | MAGIC, 4); + + if ((symbol = state->end_symbol)) { + write_to_byte_array (header.a_entry, symbol_get_value (symbol), 4); + } + + if (fseek (fp, sizeof (header), SEEK_SET)) { + + report_at (program_name, 0, REPORT_ERROR, "failed whilst seeking passed header"); + return; + + } + + section_set (text_section); + text_size = 0; + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + if (fwrite (frag->buf, frag->fixed_size, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed whilst writing text"); + return; + + } + + text_size += frag->fixed_size; + + } + + write_to_byte_array (header.a_text, text_size, 4); + + section_set (data_section); + data_size = 0; + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + if (fwrite (frag->buf, frag->fixed_size, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed whilst writing data"); + return; + + } + + data_size += frag->fixed_size; + + } + + write_to_byte_array (header.a_data, data_size, 4); + + section_set (bss_section); + bss_size = 0; + + for (frag = current_frag_chain->first_frag; frag; frag = frag->next) { + + if (frag->fixed_size == 0) { + continue; + } + + bss_size += frag->fixed_size; + + } + + write_to_byte_array (header.a_bss, bss_size, 4); + + section_set (text_section); + tr_size = 0; + + start_address_of_data = 0; + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + if (fixup->done) { + continue; + } + + if (output_relocation (fixup, start_address_of_data, fp)) { + return; + } + + tr_size += sizeof (struct relocation_info); + + } + + write_to_byte_array (header.a_trsize, tr_size, 4); + + section_set (data_section); + dr_size = 0; + + start_address_of_data = current_frag_chain->first_frag->address; + + for (fixup = current_frag_chain->first_fixup; fixup; fixup = fixup->next) { + + if (fixup->done) { + continue; + } + + if (output_relocation (fixup, start_address_of_data, fp)) { + return; + } + + dr_size += sizeof (struct relocation_info); + + } + + write_to_byte_array (header.a_drsize, dr_size, 4); + + symbol_table_size = 0; + string_table_pos = 4; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_external (symbol) || symbol_is_undefined (symbol)) { + + struct nlist symbol_entry; + memset (&symbol_entry, 0, sizeof (symbol_entry)); + + write_to_byte_array (symbol_entry.n_strx, string_table_pos, 4); + string_table_pos += strlen (symbol->name) + 1; + + if (symbol->section == undefined_section) { + symbol_entry.n_type = N_UNDF; + } else if (symbol->section == text_section) { + symbol_entry.n_type = N_TEXT; + } else if (symbol->section == data_section) { + symbol_entry.n_type = N_DATA; + } else if (symbol->section == bss_section) { + symbol_entry.n_type = N_BSS; + } else if (symbol->section == absolute_section) { + symbol_entry.n_type = N_ABS; + } else { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "invalid section %s", section_get_name (symbol->section)); + exit (EXIT_FAILURE); + + } + + write_to_byte_array (symbol_entry.n_value, symbol_get_value (symbol), 4); + symbol_entry.n_type |= N_EXT; + + if (fwrite (&symbol_entry, sizeof (symbol_entry), 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "error writing symbol table"); + return; + + } + + symbol_table_size += sizeof (symbol_entry); + + } + + } + + write_to_byte_array (header.a_syms, symbol_table_size, 4); + + if (fwrite (&string_table_pos, 4, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write string table"); + return; + + } + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (symbol_is_external (symbol) || symbol_is_undefined (symbol)) { + + if (fwrite (symbol->name, strlen (symbol->name) + 1, 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write string table"); + return; + + } + + } + + } + + rewind (fp); + + if (fwrite (&header, sizeof (header), 1, fp) != 1) { + + report_at (program_name, 0, REPORT_ERROR, "failed to write header"); + return; + + } + +} diff --git a/obj.h b/obj.h new file mode 100644 index 0000000..a92812e --- /dev/null +++ b/obj.h @@ -0,0 +1,46 @@ +/****************************************************************************** + * @file obj.h + *****************************************************************************/ +#ifndef _OBJ_H +#define _OBJ_H + +struct exec { + + unsigned char a_info[4]; + unsigned char a_text[4]; + unsigned char a_data[4]; + unsigned char a_bss[4]; + unsigned char a_syms[4]; + unsigned char a_entry[4]; + unsigned char a_trsize[4]; + unsigned char a_drsize[4]; + +}; + +#define MAGIC 0471 + +struct relocation_info { + + unsigned char r_address[4]; + unsigned char r_symbolnum[4]; + +}; + +#define N_UNDF 0x00 +#define N_ABS 0x02 +#define N_TEXT 0x04 +#define N_DATA 0x06 +#define N_BSS 0x08 + +struct nlist { + + unsigned char n_strx[4]; + unsigned char n_type; + + unsigned char n_value[4]; + +}; + +#define N_EXT 0x01 + +#endif /* _OBJ_H */ diff --git a/process.c b/process.c new file mode 100644 index 0000000..c0a38c8 --- /dev/null +++ b/process.c @@ -0,0 +1,1494 @@ +/****************************************************************************** + * @file process.c + *****************************************************************************/ +#include +#include +#include +#include +#include + +#include "as.h" +#include "cstr.h" +#include "eval.h" +#include "expr.h" +#include "frag.h" +#include "hashtab.h" +#include "kwd.h" +#include "lex.h" +#include "lib.h" +#include "listing.h" +#include "ll.h" +#include "macro.h" +#include "process.h" +#include "report.h" +#include "section.h" +#include "symbol.h" +#include "vector.h" + +struct pp_pseudo_op_entry { + + const char *name; + void (*handler) (char *start, char **pp); + +}; + +static struct vector vec_include_paths = { 0 }; + +struct cond { + + char *directive; + int ignore_line; + + char *filename; + unsigned long line_number; + + int has_else; + +}; + +static struct vector vec_ifstack = { 0 }; +static int ignore_line = 0, iflevel = 0; + +static void handler_if (char *start, char **pp) { + + struct cond *cond; + + if (!ignore_line) { + + cond = xmalloc (sizeof (*cond)); + + cond->ignore_line = ignore_line; + cond->directive = xstrdup ("if"); + + cond->filename = xstrdup (get_filename ()); + cond->line_number = get_line_number (); + + vec_push (&vec_ifstack, cond); + ignore_line = !eval (start, pp); + + } else { + iflevel++; + } + +} + +static void handler_ifdef (char *start, char **pp) { + + struct cond *cond; + char *sname, *caret; + + if (!ignore_line) { + + cond = xmalloc (sizeof (*cond)); + + cond->ignore_line = ignore_line; + cond->directive = xstrdup ("ifdef"); + + cond->filename = xstrdup (get_filename ()); + cond->line_number = get_line_number (); + + vec_push (&vec_ifstack, cond); + *pp = skip_whitespace (*pp); + + if (is_name_beginner ((int) **pp)) { + + caret = (*pp); + + while (is_name_part ((int) **pp)) { + (*pp)++; + } + + sname = xstrndup (caret, *pp - caret); + ignore_line = (find_macro (sname) == NULL); + + free (sname); + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%ifdef directive"); + } + + } else { + iflevel++; + } + +} + +static void handler_ifndef (char *start, char **pp) { + + struct cond *cond; + char *sname, *caret; + + if (!ignore_line) { + + cond = xmalloc (sizeof (*cond)); + + cond->ignore_line = ignore_line; + cond->directive = xstrdup ("ifndef"); + + cond->filename = xstrdup (get_filename ()); + cond->line_number = get_line_number (); + + vec_push (&vec_ifstack, cond); + *pp = skip_whitespace (*pp); + + if (is_name_beginner ((int) **pp)) { + + caret = (*pp); + + while (is_name_part ((int) **pp)) { + (*pp)++; + } + + sname = xstrndup (caret, *pp - caret); + ignore_line = (find_macro (sname) != NULL); + + free (sname); + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%ifndef directive"); + } + + } else { + iflevel++; + } + +} + +static void handler_elif (char *start, char **pp) { + + struct cond *cond; + + if (!iflevel) { + + if (vec_ifstack.length == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%elif without %%if"); + return; + + } else { + + cond = vec_ifstack.data[vec_ifstack.length - 1]; + + if (cond->has_else > 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%elif after %%else"); + return; + + } + + } + + ignore_line = (ignore_line && !eval (start, pp)); + + } + +} + +static void handler_else (char *start, char **pp) { + + struct cond *cond; + + if (!iflevel) { + + if (vec_ifstack.length == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%else without %%if"); + return; + + } else { + + cond = vec_ifstack.data[vec_ifstack.length - 1]; + + if (cond->has_else > 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%else after %%else"); + return; + + } + + cond->has_else++; + + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%else directive"); + } + + ignore_line = !ignore_line; + + } + +} + +static void handler_endif (char *start, char **pp) { + + struct cond *cond; + + if (!iflevel) { + + if ((cond = vec_pop (&vec_ifstack))) { + + ignore_line = cond->ignore_line; + + free (cond->filename); + free (cond->directive); + + free (cond); + + } else { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%%endif without %%if"); + } + + *pp = skip_whitespace (*pp); + + if (!is_end_of_line[(int) **pp]) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, *pp, "extra tokens at end of %%endif directive"); + } + + } else { + iflevel--; + } + +} + +static struct pp_pseudo_op_entry cond_pseudo_op_table[] = { + + { "if", &handler_if, }, + { "ifdef", &handler_ifdef }, + { "ifndef", &handler_ifndef }, + { "elif", &handler_elif }, + { "else", &handler_else }, + { "endif", &handler_endif }, + + { 0, 0 } + +}; + +static struct hashtab hashtab_cond_pseudo_ops = { 0 }; +static int includes = 0; + +static void install_cond_pseudo_op_table (struct pp_pseudo_op_entry *table) { + + struct pp_pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_cond_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_cond_pseudo_ops, key, entry); + + } + +} + +static struct pp_pseudo_op_entry *find_cond_directive (char *name) { + + struct hashtab_name *key; + struct pp_pseudo_op_entry *entry; + + if ((key = hashtab_get_key (&hashtab_cond_pseudo_ops, name))) { + + if ((entry = hashtab_get (&hashtab_cond_pseudo_ops, key))) { + return entry; + } + + } + + return 0; + +} + + +static struct hashtab hashtab_pseudo_ops = { 0 }; + +static void handler_define (char *start, char **pp) { + add_macro (start, pp, 1); +} + +static void handler_error (char *start, char **pp) { + + unsigned long len = strlen (*pp); + char *temp, *type = "error"; + + if ((*pp)[len - 1] == '\n') { + (*pp)[len - 1] = '\0'; + } + + temp = xmalloc (1 + strlen (type) + 1 + strlen (*pp) + 1); + sprintf (temp, "#%s %s", type, *pp); + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, skip_whitespace (start + 1), "%s", temp); + free (temp); + +} + +static void handler_include (char *start, char **pp) { + + const char *orig_fn = get_filename (); + unsigned long orig_ln = get_line_number (); + + char *caret, *sname, ch; + int i; + + char *inc_path, *tmp; + FILE *fp; + + struct hashtab_name *key; + struct macro *m; + + if (**pp != '"' && **pp != '<') { + + report_line_at (orig_fn, orig_ln, REPORT_ERROR, start, *pp, "%%include expects \"FILENAME\" or "); + return; + + } + + ch = (**pp == '"' ? '"' : '>'); + caret = (*pp)++; + + while (!is_end_of_line[(int) **pp]) { + + if (**pp == ch) { break; } + (*pp)++; + + } + + if (**pp != ch) { + + report_line_at (orig_fn, orig_ln, REPORT_ERROR, start, caret, "%%include expects \"FILENAME\" or "); + return; + + } else { + (*pp)++; + } + + sname = xstrndup (caret + 1, *pp - caret - 2); + + if (ch == '"' && (fp = fopen (sname, "r"))) { + + fclose (fp); + + includes++; + process_file (sname); + + includes--; + goto end; + + } + + for (i = vec_include_paths.length - 1; i >= 0; i--) { + + inc_path = vec_include_paths.data[i]; + + tmp = xmalloc (strlen (inc_path) + strlen (sname) + 1); + sprintf (tmp, "%s%s", inc_path, sname); + + if ((fp = fopen (tmp, "r"))) { + + fclose (fp); + includes++; + + process_file (tmp); + includes--; + + free (tmp); + goto end; + + } + + free (tmp); + + } + + report_line_at (orig_fn, orig_ln, REPORT_ERROR, start, caret, "failed to open '%s' for reading", sname); + +end: + + set_filename_and_line_number (orig_fn, orig_ln); + + if ((key = find_macro ("__FILE__"))) { + + if ((m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (1 + strlen (orig_fn) + 2); + sprintf (m->value, "\"%s\"", orig_fn); + + } + + } + + if ((key = find_macro ("__LINE__"))) { + + if ((m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (23); + sprintf (m->value, "%lu", orig_ln); + + } + + } + + free (sname); + +} + +static void handler_undef (char *start, char **pp) { + remove_macro (start, pp, 1); +} + +static void handler_warning (char *start, char **pp) { + + unsigned long len = strlen (*pp); + char *temp, *type = "warning"; + + if ((*pp)[len - 1] == '\n') { + (*pp)[len - 1] = '\0'; + } + + temp = xmalloc (1 + strlen (type) + 1 + strlen (*pp) + 1); + sprintf (temp, "#%s %s", type, *pp); + + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, skip_whitespace (start + 1), "%s", temp); + free (temp); + +} + +static struct pp_pseudo_op_entry pseudo_op_table[] = { + + { "define", &handler_define }, + { "error", &handler_error }, + { "include", &handler_include }, + { "undef", &handler_undef }, + { "warning", &handler_warning }, + + { 0, 0 } + +}; + +static void install_pp_pseudo_op_table (struct pp_pseudo_op_entry *table) { + + struct pp_pseudo_op_entry *entry; + struct hashtab_name *key; + + for (entry = table; entry->name; entry++) { + + if (hashtab_get_key (&hashtab_pseudo_ops, entry->name)) { + + report_at (program_name, 0, REPORT_ERROR, "duplicate entry '%s'", entry->name); + continue; + + } + + if (!(key = hashtab_alloc_name (entry->name))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to allocate memory for '%s'", entry->name); + continue; + + } + + hashtab_put (&hashtab_pseudo_ops, key, entry); + + } + +} + +struct pp_pseudo_op_entry *find_directive (char *name) { + + struct hashtab_name *key; + struct pp_pseudo_op_entry *entry; + + if ((key = hashtab_get_key (&hashtab_pseudo_ops, name))) { + + if ((entry = hashtab_get (&hashtab_pseudo_ops, key))) { + return entry; + } + + } + + return 0; + +} + + +static void init_builtin_macros (void) { + + static char *builtins[] = { "__FILE__", "__LINE__" }; + char *name; + + struct hashtab_name *key; + struct macro *m; + + unsigned long cnt = (sizeof (builtins) / sizeof (*builtins)); + unsigned i; + + for (i = 0; i < cnt; i++) { + + name = xstrdup (builtins[i]); + + if ((key = hashtab_alloc_name (name))) { + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = xstrdup (""); + + push_macro (key, m); + + } + + } + + name = xstrdup ("__SASM__"); + + if ((key = hashtab_alloc_name (name))) { + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = "1"; + + push_macro (key, m); + + } + +} + +static void init_date_time_macros (void) { + + char *timep, *buf, *name, temp[3] = { 0 }; + time_t now; + + struct hashtab_name *key; + struct macro *m; + + time (&now); + timep = ctime (&now); + + name = xstrdup ("__TIME__"); + + if ((key = hashtab_alloc_name (name))) { + + buf = xmalloc (11); + sprintf (buf, "\"%.8s\"", timep + 11);; + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = buf; + + push_macro (key, m); + + } + + name = xstrdup ("__DATE__"); + + if ((key = hashtab_alloc_name (name))) { + + sprintf (temp, "%.2s", timep + 8); + buf = xmalloc (14); + + if (atoi (temp) < 10) { + sprintf (buf, "\"%.3s %.1s %.4s\"", timep + 4, timep + 9, timep + 20); + } else { + sprintf (buf, "\"%.3s %.2s %.4s\"", timep + 4, timep + 8, timep + 20); + } + + m = xmalloc (sizeof (*m)); + m->type = MACRO_BUILTIN; + + m->name = name; + m->value = buf; + + push_macro (key, m); + + } + +} + +int preprocess_init (void) { + + struct list *item; + char *opt, *nopt, *p; + + set_filename (xstrdup ("")); + set_line_number (1); + + remove_all_macros (); + + while ((opt = vec_pop (&vec_include_paths))) { + free (opt); + } + + init_builtin_macros (); + init_date_time_macros (); + + if (state->pplist) { + + item = state->pplist; + + do { + + item = item->next; + + if (!(opt = item->data)) { + continue; + } + + if (opt[0] != '-') { + + report_at (program_name, 0, REPORT_ERROR, "unrecognised option '%s'", opt); + continue; + + } + + switch (opt[1]) { + + case 'D': + + opt = nopt = xstrdup (opt + 2); + + if ((p = strrchr (nopt, '='))) { + *p++ = ' '; + } + + add_macro (nopt, &nopt, 0); + free (opt); + + break; + + case 'I': + + vec_push (&vec_include_paths, xstrdup (opt + 2)); + break; + + case 'U': + + opt = nopt = xstrdup (opt + 2); + + remove_macro (nopt, &nopt, 0); + free (opt); + + break; + + default: + + report_at (program_name, 0, REPORT_ERROR, "unrecognised option '%s'", opt); + break; + + } + + } while (item != state->pplist); + + } + + install_cond_pseudo_op_table (cond_pseudo_op_table); + install_pp_pseudo_op_table (pseudo_op_table); + + return get_error_count () > 0; + +} + + +static char *preprocess_line (char *src, int in_macro) { + + struct cstring cstr; + char *line; + + char *caret = src, *start; + char *sname, ch; + + struct hashtab_name *key; + struct macro *m; + + cstr_new (&cstr); + + while (!is_end_of_line[(int) *caret]) { + + start = caret; + + if (*caret == ' ' || *caret == '\t') { + + while (*caret == ' ' || *caret == '\t') { + + cstr_ccat (&cstr, ' '); + caret++; + + } + + continue; + + } + + if (*caret == '"' || *caret == '\'') { + + ch = *caret++; + + while (!is_end_of_line[(int) *caret]) { + + if (*caret == '\\') { + + caret++; + + if (!is_end_of_line[(int) *caret]) { + caret++; + } + + continue; + + } + + if (*caret == ch) { break; } + caret++; + + } + + if (*caret != ch) { + + char *temp = xmalloc ((caret - start) + 2); + sprintf (temp, "%.*s%c", (int) (caret - start), start, ch); + + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, src, start, "missing terminating %c character", ch); + cstr_cat (&cstr, temp, strlen (temp)); + + continue; + + } + + caret++; + + cstr_cat (&cstr, start, caret - start); + continue; + + } + + if (is_name_beginner ((int) *caret)) { + + sname = symname (&caret); + + if ((key = find_macro (sname))) { + + if ((m = get_macro (key))) { + + char *pm; + int spaces = 0; + + if (*caret == ' ' || *caret == '\t') { + + cstr_ccat (&cstr, ' '); + + while (*caret == ' ' || *caret == '\t') { + + spaces++; + caret++; + + } + + } + + if ((pm = process_macro (start, &caret, m))) { + + char *temp = preprocess_line (pm, 1); + cstr_cat (&cstr, temp, strlen (temp)); + + if (!is_end_of_line[(int) *pm]) { + + while (spaces--) { + cstr_ccat (&cstr, ' '); + } + + } + + } + + } + + continue; + + } + + cstr_cat (&cstr, start, caret - start); + continue; + + } + + if (*caret == '@') { + + char *arg, *temp = (caret + 1); + + if ((arg = symname (&temp))) { + + if (xstrcasecmp (arg, "DataSize") == 0) { + + caret = temp; + free (arg); + + cstr_ccat (&cstr, state->data_size | 0x30); + continue; + + } + + if (xstrcasecmp (arg, "Model") == 0) { + + caret = temp; + free (arg); + + cstr_ccat (&cstr, (state->model > 0 ? state->model | 0x30 : '1')); + continue; + + } + + free (arg); + + } + + } + + if (isdigit ((int) *caret)) { + + if (caret[0] == '0' && tolower ((int) caret[1]) == 'x') { + caret += 2; + } + + while (isdigit ((int) *caret)) { + caret++; + } + + cstr_cat (&cstr, start, caret - start); + continue; + + } + + if (ispunct ((int) *caret)) { + + cstr_ccat (&cstr, *caret); + + caret++; + continue; + + } + + report_line_at (get_filename (), get_line_number (), REPORT_INTERNAL_ERROR, src, caret, "Do we see this???"); + caret++; + + } + + if (!in_macro && state->ofp) { + cstr_ccat (&cstr, '\n'); + } + + cstr_ccat (&cstr, '\0'); + + line = xstrdup (cstr.data);; + cstr_free (&cstr); + + return line; + +} + +struct section *machine_dependent_simplified_expression_read_into (char *start, char **pp, struct expr *expr); + +static struct section *get_known_section_expression (char *start, char **pp, struct expr *expr) { + + struct section *section = machine_dependent_simplified_expression_read_into (start, pp, expr); + + if (expr->type == EXPR_TYPE_INVALID || expr->type == EXPR_TYPE_ABSENT) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "expected address expression"); + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + section = absolute_section; + + } + + if (section == undefined_section) { + + if (expr->add_symbol && symbol_get_section (expr->add_symbol) != expr_section) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "symbol \"%s\" undefined; zero assumed", symbol_get_name (expr->add_symbol)); + } else { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "some symbol undefined; zero assumed"); + } + + expr->type = EXPR_TYPE_CONSTANT; + expr->add_number = 0; + + section = absolute_section; + + } + + return section; + +} + +static void do_org (struct section *section, struct expr *expr, unsigned long fill_value) { + + struct symbol *symbol; + + unsigned char *p_in_frag; + unsigned long offset; + + if (section != current_section && section != absolute_section && section != expr_section) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid section \"%s\"", section_get_name (section)); + } + + symbol = expr->add_symbol; + offset = expr->add_number; + + if (fill_value && current_section == bss_section) { + report_at (get_filename (), get_line_number (), REPORT_WARNING, "ignoring fill value in section \"%s\"", section_get_name (current_section)); + } + + if (expr->type != EXPR_TYPE_CONSTANT && expr->type != EXPR_TYPE_SYMBOL) { + + symbol = make_expr_symbol (expr); + offset = 0; + + } + + *(p_in_frag = frag_alloc_space (1)) = (unsigned char) fill_value; + frag_set_as_variant (RELAX_TYPE_ORG, 0, symbol, offset, 0); + +} + +static void internal_set (char *start, char **pp, struct symbol *symbol) { + + struct expr expr; + machine_dependent_simplified_expression_read_into (start, pp, &expr); + + if (expr.type == EXPR_TYPE_INVALID) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "invalid expression"); + } else if (expr.type == EXPR_TYPE_ABSENT) { + report_at (get_filename (), get_line_number (), REPORT_ERROR, "missing expression"); + } + + if (symbol_is_section_symbol (symbol)) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "attempt to set value of section symbol"); + return; + + } + + switch (expr.type) { + + case EXPR_TYPE_INVALID: + case EXPR_TYPE_ABSENT: + + expr.add_number = 0; + /* fall through */ + + case EXPR_TYPE_CONSTANT: + + symbol_set_frag (symbol, &zero_address_frag); + symbol_set_section (symbol, absolute_section); + symbol_set_value (symbol, expr.add_number); + + break; + + default: + + symbol_set_frag (symbol, &zero_address_frag); + symbol_set_section (symbol, expr_section); + symbol_set_value_expression (symbol, &expr); + + break; + + } + +} + +static void assign_symbol (char *start, char **pp, char *name) { + + struct symbol *symbol; + + if (name[0] == '.' && name[1] == '\0') { + + struct section *section; + struct expr expr; + + section = get_known_section_expression (start, pp, &expr); + do_org (section, &expr, 0); + + return; + + } + + symbol = symbol_find_or_make (name, SYMBOL_SCOPE_LOCAL); + internal_set (start, pp, symbol); + +} + +static void handle_org (char *start, char **pp) { + + struct expr expr; + + struct section *section = get_known_section_expression (start, pp, &expr); + unsigned long fill_value = 0; + + if (**pp == ',') { + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "+++handle_org"); + (*pp)++; + + } + + do_org (section, &expr, fill_value); + +} + +extern void machine_dependent_assemble_line (char *start, char *line); +extern void machine_dependent_handle_proc (char *start, char **pp, char *name); +extern void machine_dependent_handle_endp (char *start, char **pp, char *name); + +static char *find_end_of_line (char *line) { + + while (!is_end_of_line[(int) *line]) { + + if (line++[0] == '\"') { + + while (*line && *line != '\"') { + + if (line++[0] == '\\' && *line) { + line++; + } + + } + + } + + } + + return line; + +} + +extern void *machine_dependent_find_templates (char *name, int check_suffix); +extern void *machine_dependent_find_reg_entry (char *name); + +static void process_line (char *line, char *line_end) { + + char *start = line, *caret; + char *arg = 0, saved_ch; + + struct pseudo_op_entry *poe; + int has_colon; + + caret = (line = skip_whitespace (line)); + + if (caret >= line_end) { + return; + } + + while (line < line_end) { + + has_colon = 0; + + if (is_name_beginner ((int) *line)) { + + caret = line; + + if (!(arg = symname (&line))) { + goto check; + } + + if (xstrcasecmp (arg, "equ") == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "equ not preceded by label"); + + ignore_rest_of_line (&line); + goto check; + + } + + if (xstrcasecmp (arg, "org") == 0) { + + line = skip_whitespace (line); + + handle_org (start, &line); + goto check; + + } + + if (xstrcasecmp (arg, "proc") == 0 || xstrcasecmp (arg, "endp") == 0) { + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "procedure must have a name"); + + ignore_rest_of_line (&line); + goto check; + + } + + line = skip_whitespace (line); + + if ((poe = find_poe (arg))) { + + poe->handler (start, &line); + goto check; + + } + + if (machine_dependent_find_templates (arg, 1)) { + + saved_ch = *(line = find_end_of_line (line)); + *line = '\0'; + + machine_dependent_assemble_line (start, skip_whitespace (start)); + *line = saved_ch; + + goto check; + + } + + if (!machine_dependent_find_reg_entry (arg)) { + + char *temp, *directive; + struct symbol *symbol; + + if (*line == ':' || is_end_of_line[(int) *line]) { + + if ((has_colon = (*line == ':'))) { + + temp = (line = skip_whitespace (line + 1)); + + if ((directive = symname (&line))) { + + if (xstrcasecmp (directive, "equ") == 0) { + + assign_symbol (start, &line, arg); + + free (directive); + goto check; + + } + + free (directive); + + } + + line = temp; + + } + + symbol = symbol_label (start, caret, arg); + symbol->scope = SYMBOL_SCOPE_LOCAL; + + if (!has_colon) { + report_line_at (get_filename (), get_line_number (), REPORT_WARNING, start, caret, "label alone without colon"); + } + + free (arg); + continue; + + } + + temp = line; + + if ((directive = symname (&line))) { + + if ((poe = find_data_poe (directive))) { + + free (directive); + + symbol = symbol_label (start, caret, arg); + symbol->scope = SYMBOL_SCOPE_LOCAL; + + poe->handler (start, &line); + goto check; + + } + + if (xstrcasecmp (directive, "equ") == 0) { + + assign_symbol (start, &line, arg); + + free (directive); + goto check; + + } + + if (xstrcasecmp (directive, "proc") == 0) { + + machine_dependent_handle_proc (start, &line, arg); + + free (directive); + goto check; + + } + + if (xstrcasecmp (directive, "endp") == 0) { + + machine_dependent_handle_endp (start, &line, arg); + + free (directive); + goto check; + + } + + free (directive); + + } + + line = temp; + + } + + line = caret; + + } + + check: + + if (arg) { free (arg); } + arg = 0; + + line = skip_whitespace (line); + + if (is_end_of_line[(int) *line]) { + break; + } + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, line, "junk '%c' at the end of line", *line); + ignore_rest_of_line (&line); + + } + +} + +void process_file (const char *ifile) { + + char *start, *arg, *caret; + FILE *fp; + + char *line, *line_end, *real_line; + unsigned long real_line_len; + + unsigned long newlines; + unsigned long new_line_number; + + struct pp_pseudo_op_entry *poe; + void *load_line_internal_data = NULL; + + struct cond *cond; + int cond_idx, proc_idx; + + struct hashtab_name *key; + struct macro *m; + + if (!ifile || strcmp (ifile, "-") == 0) { + + set_filename (xstrdup ("")); + fp = stdin; + + } else { + + set_filename (xstrdup (ifile)); + + if (!(fp = fopen (ifile, "r"))) { + + report_at (program_name, 0, REPORT_FATAL_ERROR, "Failed to open '%s' for reading", ifile); + return; + + } + + } + + set_line_number (0); + new_line_number = 1; + + if ((key = find_macro ("__FILE__"))) { + + const char *filename = get_filename ();; + + if (filename && (m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (1 + strlen (filename) + 2); + sprintf (m->value, "\"%s\"", filename); + + } + + } + + load_line_internal_data = load_line_create_internal_data (&new_line_number); + + while (!load_line (&line, &line_end, &real_line, &real_line_len, &newlines, fp, &load_line_internal_data)) { + + set_line_number (new_line_number); + new_line_number += newlines + 1; + + if (state->lfile) { + + update_listing_line (current_frag); + add_listing_line (real_line, real_line_len, get_filename (), get_line_number ()); + + } + + if ((key = find_macro ("__LINE__"))) { + + if ((m = get_macro (key)) && m->type == MACRO_BUILTIN) { + + free (m->value); + + m->value = xmalloc (23); + sprintf (m->value, "%lu", get_line_number ()); + + } + + } + + start = line; + caret = (line = skip_whitespace (line)); + + if (!ignore_line && line >= line_end) { + continue; + } + + if (*line == '%') { + + caret = (line = skip_whitespace (line + 1)); + + if (is_name_beginner ((int) *line)) { + + while (is_name_part ((int) *line)) { + line++; + } + + arg = xstrndup (caret, line - caret); + line = skip_whitespace (line); + + if ((poe = find_cond_directive (arg))) { + + poe->handler (start, &line); + free (arg); + + continue; + + } + + if (!ignore_line) { + + if ((poe = find_directive (arg))) { + + poe->handler (start, &line); + free (arg); + + continue; + + } + + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "unknown preprocessor directive '%%%s'", arg); + + } + + free (arg); + continue; + + } + + if (!is_end_of_line[(int) *caret] && !ignore_line) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "unknown preprocessor directive '%%%c'", *caret); + } + + continue; + + } + + if (is_name_beginner ((int) *line)) { + + if ((arg = symname (&line))) { + + line = skip_whitespace (line); + + if ((poe = find_cond_directive (arg))) { + + poe->handler (start, &line); + free (arg); + + continue; + + } + + } + + line = caret; + + } + + if (!ignore_line) { + + if (line < line_end) { + + char *tokenized_line = preprocess_line (line, 0); + + process_line (tokenized_line, tokenized_line + strlen (tokenized_line)); + free (tokenized_line); + + } + + } + + } + + if (state->lfile) { + update_listing_line (current_frag); + } + + load_line_destroy_internal_data (load_line_internal_data); + + if (!includes) { + + for (cond_idx = 0; cond_idx < vec_ifstack.length; cond_idx++) { + + cond = vec_ifstack.data[cond_idx]; + report_at (cond->filename, cond->line_number, REPORT_ERROR, "unterminated %%%s statement", cond->directive); + + free (cond->filename); + free (cond->directive); + + free (cond); + + } + + for (proc_idx = 0; proc_idx < state->procs.length; proc_idx++) { + + struct proc *proc = (struct proc *) state->procs.data[proc_idx]; + report_at (proc->filename, proc->line_number, REPORT_ERROR, "procedure %s is not closed", proc->name); + + } + + } + + if (fp != stdin) { fclose (fp); } + +} diff --git a/process.h b/process.h new file mode 100644 index 0000000..a75313a --- /dev/null +++ b/process.h @@ -0,0 +1,10 @@ +/****************************************************************************** + * @file process.h + *****************************************************************************/ +#ifndef _PROCESS_H +#define _PROCESS_H + +int preprocess_init (void); +void process_file (const char *ifile); + +#endif /* _PROCESS_H */ diff --git a/report.c b/report.c new file mode 100644 index 0000000..e78065b --- /dev/null +++ b/report.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * @file report.c + *****************************************************************************/ +#include +#include +#include + +#include "report.h" +unsigned int errors = 0; + +#ifndef __PDOS__ +#if defined (_WIN32) +# include +static int OriginalConsoleColor = -1; +#endif + +static void reset_console_color (void) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + + if (OriginalConsoleColor == -1) { return; } + + SetConsoleTextAttribute (hStdError, OriginalConsoleColor); + OriginalConsoleColor = -1; + +#else + + fprintf (stderr, "\033[0m"); + +#endif + +} + +static void set_console_color (int color) { + +#if defined (_WIN32) + + HANDLE hStdError = GetStdHandle (STD_ERROR_HANDLE); + WORD wColor; + + if (OriginalConsoleColor == -1) { + + CONSOLE_SCREEN_BUFFER_INFO csbi; + + if (!GetConsoleScreenBufferInfo (hStdError, &csbi)) { + return; + } + + OriginalConsoleColor = csbi.wAttributes; + + } + + wColor = (OriginalConsoleColor & 0xF0) + (color & 0xF); + SetConsoleTextAttribute (hStdError, wColor); + +#else + + fprintf (stderr, "\033[%dm", color); + +#endif + +} +#endif + +static void output_message (const char *filename, unsigned int lineno, unsigned int idx, int type, const char *fmt, va_list ap) { + + if (filename) { + + if (lineno == 0) { + fprintf (stderr, "%s: ", filename); + } else { + fprintf (stderr, "%s:", filename); + } + + } + + if (lineno > 0) { + + if (idx == 0) { + fprintf (stderr, "%u: ", lineno); + } else { + fprintf (stderr, "%u:", lineno); + } + + } + + if (idx > 0) { + fprintf (stderr, "%u: ", idx); + } + + if (type == REPORT_ERROR || type == REPORT_FATAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_ERROR); +#endif + + if (type == REPORT_ERROR) { + fprintf (stderr, "error:"); + } else { + fprintf (stderr, "fatal error:"); + } + + } else if (type == REPORT_INTERNAL_ERROR) { + +#ifndef __PDOS__ + set_console_color (COLOR_INTERNAL_ERROR); +#endif + + fprintf (stderr, "internal error:"); + + } else if (type == REPORT_WARNING) { + +#ifndef __PDOS__ + set_console_color (COLOR_WARNING); +#endif + + fprintf (stderr, "warning:"); + + } + +#ifndef __PDOS__ + reset_console_color (); +#endif + + fprintf (stderr, " "); + vfprintf (stderr, fmt, ap); + fprintf (stderr, "\n"); + + if (type != REPORT_WARNING) { + ++errors; + } + +} + +unsigned int get_error_count (void) { + return errors; +} + +void report_at (const char *filename, unsigned int lineno, int type, const char *fmt, ...) { + + va_list ap; + + va_start (ap, fmt); + output_message (filename, lineno, 0, type, fmt, ap); + va_end (ap); + +} + +void report_line_at (const char *filename, unsigned int lineno, int type, const char *str, const char *caret, const char *fmt, ...) { + + int ident = 1; + va_list ap; + + unsigned int idx = 0; + if (str && caret) { idx = (caret - str) + 1; } + + va_start (ap, fmt); + output_message (filename, lineno, idx, type, fmt, ap); + va_end (ap); + + if (str && caret) { + + if (lineno > 0) { + ident = fprintf (stderr, " %8u | ", lineno); + } else { + ident = fprintf (stderr, "%*s", 12, ""); + } + + fprintf (stderr, "%s", str); + + if (str[strlen (str) - 1] != '\n') { + fprintf (stderr, "\n"); + } + + fprintf (stderr, "%*s^\n", (int) (caret - str) + ident, ""); + + } + +} diff --git a/report.h b/report.h new file mode 100644 index 0000000..b7e6e6f --- /dev/null +++ b/report.h @@ -0,0 +1,27 @@ +/****************************************************************************** + * @file report.h + *****************************************************************************/ +#ifndef _REPORT_H +#define _REPORT_H + +#if defined (_WIN32) +# define COLOR_ERROR 12 +# define COLOR_WARNING 13 +# define COLOR_INTERNAL_ERROR 19 +#else +# define COLOR_ERROR 91 +# define COLOR_INTERNAL_ERROR 94 +# define COLOR_WARNING 95 +#endif + +#define REPORT_WARNING 0 +#define REPORT_ERROR 1 +#define REPORT_FATAL_ERROR 3 +#define REPORT_INTERNAL_ERROR 4 + +unsigned int get_error_count (void); + +void report_at (const char *filename, unsigned int lineno, int type, const char *fmt, ...); +void report_line_at (const char *filename, unsigned int lineno, int type, const char *str, const char *caret, const char *fmt, ...); + +#endif /* _REPORT_H */ diff --git a/section.c b/section.c new file mode 100644 index 0000000..8922fa4 --- /dev/null +++ b/section.c @@ -0,0 +1,139 @@ +/****************************************************************************** + * @file section.c + *****************************************************************************/ +#include + +#include "frag.h" +#include "lib.h" +#include "section.h" +#include "symbol.h" + +struct section { + + const char *name; + + struct frag_chain frag_chain; + struct symbol *symbol; + + int alignment_power; + struct section *next; + +}; + +static struct section internal_sections[4]; +static struct symbol section_symbols[4]; + +struct section *undefined_section; +struct section *absolute_section; +struct section *expr_section; +struct section *reg_section; + +struct section *text_section; +struct section *data_section; +struct section *bss_section; + +struct section *current_section; + +struct frag_chain *current_frag_chain = 0; +struct section *sections = 0; + +static struct section *find_or_make_section_by_name (const char *name) { + + struct section *section, **p_next; + + for (p_next = §ions, section = sections; section; p_next = &(section->next), section = *p_next) { + + if (strcmp (name, section->name) == 0) { + break; + } + + } + + if (!section) { + + section = xmalloc (sizeof (*section)); + section->name = xstrdup (name); + + section->symbol = symbol_create (name, section, 0, &zero_address_frag); + section->symbol->flags |= SYMBOL_FLAG_SECTION_SYMBOL; + + symbol_add_to_chain (section->symbol); + *p_next = section; + + } + + return section; + +} + +struct section *section_get_next_section (struct section *section) { + return section->next; +} + +struct section *section_set (struct section *section) { + + current_section = section; + + current_frag_chain = ¤t_section->frag_chain; + current_frag = current_frag_chain->last_frag; + + return section; + +} + +struct section *section_set_by_name (const char *name) { + return section_set (find_or_make_section_by_name (name)); +} + +struct symbol *section_symbol (struct section *section) { + return section->symbol; +} + +const char *section_get_name (struct section *section) { + return section->name; +} + +void section_record_alignment_power (struct section *section, int alignment_power) { + + if (alignment_power > section->alignment_power) { + section->alignment_power = alignment_power; + } + +} + + +#define CREATE_INTERNAL_SECTION(section_var, section_name, section_index) \ + (section_var) = &internal_sections[(section_index)]; \ + (section_var)->name = (section_name); \ + (section_var)->symbol = §ion_symbols[(section_index)]; \ + (section_var)->symbol->name = (section_name); \ + (section_var)->symbol->section = (section_var); \ + (section_var)->symbol->frag = &zero_address_frag; \ + symbol_set_value ((section_var)->symbol, 0); \ + (section_var)->symbol->flags |= SYMBOL_FLAG_SECTION_SYMBOL + +void sections_init (void) { + + CREATE_INTERNAL_SECTION (undefined_section, "*UND*", 0); + CREATE_INTERNAL_SECTION (absolute_section, "*ABS*", 1); + CREATE_INTERNAL_SECTION (expr_section, "*EXPR*", 2); + CREATE_INTERNAL_SECTION (reg_section, "*REG*", 3); + + text_section = section_set_by_name (".text"); + text_section->frag_chain.last_frag = text_section->frag_chain.first_frag = frag_alloc (); + text_section->frag_chain.last_fixup = text_section->frag_chain.first_fixup = 0; + + data_section = section_set_by_name (".data"); + data_section->frag_chain.last_frag = data_section->frag_chain.first_frag = frag_alloc (); + data_section->frag_chain.last_fixup = data_section->frag_chain.first_fixup = 0; + + bss_section = section_set_by_name (".bss"); + bss_section->frag_chain.last_frag = bss_section->frag_chain.first_frag = frag_alloc (); + bss_section->frag_chain.last_fixup = bss_section->frag_chain.first_fixup = 0; + + /* .text section is the default section. */ + section_set (text_section); + +} + +#undef CREATE_INTERNAL_SECTION diff --git a/section.h b/section.h new file mode 100644 index 0000000..d4faeaa --- /dev/null +++ b/section.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * @file section.h + *****************************************************************************/ +#ifndef _SECTION_H +#define _SECTION_H + +struct frag_chain { + + struct fixup *first_fixup, *last_fixup; + struct frag *first_frag, *last_frag; + + struct frag_chain *next; + +}; + +#define SECTION_IS_NORMAL(section) \ + ((section != undefined_section) && (section != absolute_section) && (section != expr_section) && (section != reg_section)) + +extern struct section *undefined_section; +extern struct section *absolute_section; +extern struct section *expr_section; +extern struct section *reg_section; + +extern struct section *text_section; +extern struct section *data_section; +extern struct section *bss_section; + +extern struct section *current_section; + +extern struct frag_chain *current_frag_chain; +extern struct section *sections; + +struct section *section_get_next_section (struct section *section); +struct section *section_set (struct section *section); +struct section *section_set_by_name (const char *name); + +struct symbol *section_symbol (struct section *section); +const char *section_get_name (struct section *section); + +void section_record_alignment_power (struct section *section, int alignment_power); + +#endif /* _SECTION_H */ diff --git a/symbol.c b/symbol.c new file mode 100644 index 0000000..9f069d2 --- /dev/null +++ b/symbol.c @@ -0,0 +1,788 @@ +/****************************************************************************** + * @file symbol.c + *****************************************************************************/ +#include +#include + +#include "as.h" +#include "expr.h" +#include "frag.h" +#include "lib.h" +#include "report.h" +#include "section.h" +#include "symbol.h" + +static struct symbol **pointer_to_pointer_to_next_symbol = &symbols; + +struct symbol *symbols = 0; +int finalize_symbols = 0; + +static void report_op_error (struct symbol *symbol, struct symbol *left, enum expr_type op, struct symbol *right) { + + const char *op_name = 0; + + struct section *left_section = left ? symbol_get_section (left) : 0; + struct section *right_section = symbol_get_section (right); + + const char *filename; + unsigned long line_number; + + switch (op) { + + case EXPR_TYPE_LOGICAL_OR: + + op_name = "||"; + break; + + case EXPR_TYPE_LOGICAL_AND: + + op_name = "&&"; + break; + + case EXPR_TYPE_EQUAL: + + op_name = "=="; + break; + + case EXPR_TYPE_NOT_EQUAL: + + op_name = "!="; + break; + + case EXPR_TYPE_LESSER: + + op_name = "<"; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + op_name = "<="; + break; + + case EXPR_TYPE_GREATER: + + op_name = ">"; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + op_name = ">="; + break; + + case EXPR_TYPE_ADD: + + op_name = "+"; + break; + + case EXPR_TYPE_SUBTRACT: + + op_name = "-"; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + op_name = "|"; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + op_name = "^"; + break; + + case EXPR_TYPE_BIT_AND: + + op_name = "&"; + break; + + case EXPR_TYPE_MULTIPLY: + + op_name = "*"; + break; + + case EXPR_TYPE_DIVIDE: + + op_name = "/"; + break; + + case EXPR_TYPE_MODULUS: + + op_name = "%"; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + op_name = "<<"; + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + op_name = ">>"; + break; + + case EXPR_TYPE_LOGICAL_NOT: + + op_name = "!"; + break; + + case EXPR_TYPE_BIT_NOT: + + op_name = "~"; + break; + + case EXPR_TYPE_UNARY_MINUS: + + op_name = "-"; + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "report_op_error invalid case %i", op); + exit (EXIT_FAILURE); + + } + + if (expr_symbol_get_filename_and_line_number (symbol, &filename, &line_number) == 0) { + + if (left) { + report_at (filename, line_number, REPORT_ERROR, "invalid operands (%s and %s sections) for `%s'", section_get_name (left_section), section_get_name (right_section), op_name); + } else { + report_at (filename, line_number, REPORT_ERROR, "invalid operand (%s section) for `%s'", section_get_name (right_section), op_name); + } + + } else { + + if (left) { + report_at (program_name, 0, REPORT_ERROR, "invalid operands (%s and %s sections) for `%s' when setting `%s'", section_get_name (left_section), section_get_name (right_section), op_name, symbol_get_name (symbol)); + } else { + report_at (program_name, 0, REPORT_ERROR, "invalid operand (%s section) for `%s' when setting `%s'", section_get_name (right_section), op_name, symbol_get_name (symbol)); + } + + } + +} + +struct expr *symbol_get_value_expression (struct symbol *symbol) { + return &(symbol->value); +} + +struct frag *symbol_get_frag (struct symbol *symbol) { + return symbol->frag; +} + +struct section *symbol_get_section (struct symbol *symbol) { + return symbol->section; +} + +struct symbol *symbol_create (const char *name, struct section *section, unsigned long value, struct frag *frag) { + + struct symbol *symbol = xmalloc (sizeof (*symbol)); + + symbol->name = xstrdup (name); + symbol->section = section; + symbol->frag = frag; + + symbol_set_value (symbol, value); + return symbol; + +} + +struct symbol *symbol_find (const char *name) { + + struct symbol *symbol; + + for (symbol = symbols; symbol; symbol = symbol->next) { + + if (strcmp (symbol->name, name) == 0) { + break; + } + + } + + return symbol; + +} + +struct symbol *symbol_find_or_make (char *name, int scope) { + + struct symbol *symbol = symbol_find (name); + + if (!symbol) { + + symbol = symbol_make (name); + symbol->scope = scope; + + symbol_add_to_chain (symbol); + + } + + return symbol; + +} + +struct symbol *symbol_label (char *start, char *caret, char *name) { + + struct symbol *symbol = 0; + + if ((symbol = symbol_find (name))) { + + if (symbol->section == undefined_section) { + + symbol->section = current_section; + symbol->frag = current_frag; + + symbol_set_value (symbol, current_frag->fixed_size); + + } else { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } + + } else { + + if (xstrcasecmp (name, "DGROUP") == 0 || strcmp (name, "_end") == 0 || strcmp (name, "_edata") == 0) { + report_line_at (get_filename (), get_line_number (), REPORT_ERROR, start, caret, "symbol '%s' is already defined", name); + } else { + + symbol = symbol_create (name, current_section, current_frag->fixed_size, current_frag); + symbol_add_to_chain (symbol); + + } + + } + + return symbol; + +} + +struct symbol *symbol_make (const char *name) { + return symbol_create (name, undefined_section, 0, &zero_address_frag); +} + +struct symbol *symbol_temp_new_now (void) { + return symbol_create (FAKE_LABEL_NAME, current_section, current_frag->fixed_size, current_frag); +} + +char *symbol_get_name (struct symbol *symbol) { + return symbol->name; +} + +int get_symbol_snapshot (struct symbol **symbol_p, unsigned long *value_p, struct section **section_p, struct frag **frag_p) { + + struct symbol *symbol = *symbol_p; + struct expr *expr = symbol_get_value_expression (symbol); + + if (!symbol_is_resolved (symbol) && expr->type != EXPR_TYPE_INVALID) { + + int resolved; + + if (symbol->resolving) { + return 1; + } + + symbol->resolving = 1; + resolved = resolve_expression (expr); + symbol->resolving = 0; + + if (resolved == 0) { + return 1; + } + + switch (expr->type) { + + case EXPR_TYPE_CONSTANT: + case EXPR_TYPE_REGISTER: + + if (!symbol_uses_other_symbol (symbol)) { + break; + } + + /* fall through. */ + + case EXPR_TYPE_SYMBOL: + + symbol = expr->add_symbol; + break; + + default: + + return 1; + + } + + } + + *value_p = expr->add_number; + *symbol_p = symbol; + + *section_p = symbol_get_section (symbol); + *frag_p = symbol_get_frag (symbol); + + if (*section_p == expr_section) { + + switch (expr->type) { + + case EXPR_TYPE_CONSTANT: + + *section_p = absolute_section; + break; + + case EXPR_TYPE_REGISTER: + + *section_p = reg_section; + break; + + default: + + break; + + } + + } + + return 0; + +} + +int symbol_force_reloc (struct symbol *symbol) { + return symbol->section == undefined_section; +} + +int symbol_is_external (struct symbol *symbol) { + return symbol->flags & SYMBOL_FLAG_EXTERNAL; +} + +int symbol_is_resolved (struct symbol *symbol) { + return symbol->resolved; +} + +int symbol_is_section_symbol (struct symbol *symbol) { + return symbol->flags & SYMBOL_FLAG_SECTION_SYMBOL; +} + +int symbol_is_undefined (struct symbol *symbol) { + return symbol->section == undefined_section; +} + +int symbol_uses_other_symbol (struct symbol *symbol) { + return (symbol->value.type == EXPR_TYPE_SYMBOL); +} + +int symbol_uses_reloc_symbol (struct symbol *symbol) { + return (symbol->value.type == EXPR_TYPE_SYMBOL && ((symbol_is_resolved (symbol) && symbol->value.op_symbol) || symbol_is_undefined (symbol))); +} + +unsigned long symbol_get_value (struct symbol *symbol) { + return symbol_resolve_value (symbol); +} + +unsigned long symbol_resolve_value (struct symbol *symbol) { + + struct section *final_section = symbol_get_section (symbol); + int resolved = 0; + + unsigned long final_value = 0; + + if (symbol->resolved) { + + if (symbol->value.type == EXPR_TYPE_CONSTANT) { + final_value = symbol->value.add_number; + } + + return final_value; + + } + + if (symbol->resolving) { + + report_at (get_filename (), get_line_number (), REPORT_ERROR, "symbol definition loop encountered at '%s'", symbol_get_name (symbol)); + + final_value = 0; + resolved = 1; + + } else { + + struct section *left_section, *right_section; + unsigned long left_value, right_value; + + int can_move_into_absolute_section; + + final_value = symbol->value.add_number; + symbol->resolving = 1; + + switch (symbol->value.type) { + + case EXPR_TYPE_ABSENT: + + final_value = 0; + /* fall through */ + + case EXPR_TYPE_CONSTANT: + + final_value += symbol->frag->address; + + if (final_section == expr_section) { + final_section = absolute_section; + } + + /* fall through */ + + case EXPR_TYPE_REGISTER: + + resolved = 1; + break; + + case EXPR_TYPE_SYMBOL: + + left_value = symbol_resolve_value (symbol->value.add_symbol); + left_section = symbol_get_section (symbol->value.add_symbol); + + do_symbol: + + if (left_section == undefined_section || (finalize_symbols && final_section == expr_section && left_section != expr_section && left_section != absolute_section)) { + + if (finalize_symbols) { + + symbol->value.type = EXPR_TYPE_SYMBOL; + + symbol->value.op_symbol = symbol->value.add_symbol; + symbol->value.add_number = final_value; + + } + + final_value += symbol->frag->address + left_value; + final_section = left_section; + + resolved = symbol_is_resolved (symbol->value.add_symbol); + symbol->resolving = 0; + + goto exit_do_not_set_value; + + } else { + + final_value += symbol->frag->address + left_value; + + if (final_section == expr_section || final_section == undefined_section) { + final_section = left_section; + } + + } + + resolved = symbol_is_resolved (symbol->value.add_symbol); + break; + + case EXPR_TYPE_LOGICAL_NOT: + case EXPR_TYPE_BIT_NOT: + case EXPR_TYPE_UNARY_MINUS: + + left_value = symbol_resolve_value (symbol->value.add_symbol); + left_section = symbol_get_section (symbol->value.add_symbol); + + if (symbol->value.type != EXPR_TYPE_LOGICAL_NOT && left_section != absolute_section && finalize_symbols) { + report_op_error (symbol, 0, symbol->value.type, symbol->value.add_symbol); + } + + if (final_section == expr_section || final_section == undefined_section) { + final_section = absolute_section; + } + + switch (symbol->value.type) { + + case EXPR_TYPE_LOGICAL_NOT: + + left_value = !left_value; + break; + + case EXPR_TYPE_BIT_NOT: + + left_value = ~left_value; + break; + + case EXPR_TYPE_UNARY_MINUS: + + left_value = -left_value; + break; + + default: + + break; + + } + + resolved = symbol_is_resolved (symbol->value.add_symbol); + final_value += left_value + symbol->frag->address; + + break; + + case EXPR_TYPE_LOGICAL_OR: + case EXPR_TYPE_LOGICAL_AND: + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + case EXPR_TYPE_LESSER: + case EXPR_TYPE_LESSER_EQUAL: + case EXPR_TYPE_GREATER: + case EXPR_TYPE_GREATER_EQUAL: + case EXPR_TYPE_ADD: + case EXPR_TYPE_SUBTRACT: + case EXPR_TYPE_BIT_INCLUSIVE_OR: + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + case EXPR_TYPE_BIT_AND: + case EXPR_TYPE_MULTIPLY: + case EXPR_TYPE_DIVIDE: + case EXPR_TYPE_MODULUS: + case EXPR_TYPE_LEFT_SHIFT: + case EXPR_TYPE_RIGHT_SHIFT: + + left_value = symbol_resolve_value (symbol->value.add_symbol); + left_section = symbol_get_section (symbol->value.add_symbol); + + right_value = symbol_resolve_value (symbol->value.op_symbol); + right_section = symbol_get_section (symbol->value.op_symbol); + + if (symbol->value.type == EXPR_TYPE_ADD) { + + if (right_section == absolute_section) { + + final_value += right_value; + goto do_symbol; + + } else if (left_section == absolute_section) { + + symbol->value.add_symbol = symbol->value.op_symbol; + final_value += left_value; + + left_value = right_value; + left_section = right_section; + + goto do_symbol; + + } + + } else if (symbol->value.type == EXPR_TYPE_SUBTRACT) { + + if (right_section == absolute_section) { + + final_value -= right_value; + goto do_symbol; + + } + + } + + can_move_into_absolute_section = 1; + + /** + * Equality and non-equality operations are allowed on everything. + * Subtraction and other comparison operators are allowed if both operands are in the same section. + * For everything else, both operands must be absolute. + * Addition and subtraction of constants is handled above. + */ + if (!(left_section == absolute_section && right_section == absolute_section) + && !(symbol->value.type == EXPR_TYPE_EQUAL || symbol->value.type == EXPR_TYPE_NOT_EQUAL) + && !((symbol->value.type == EXPR_TYPE_SUBTRACT + || symbol->value.type == EXPR_TYPE_LESSER || symbol->value.type == EXPR_TYPE_LESSER_EQUAL + || symbol->value.type == EXPR_TYPE_GREATER || symbol->value.type == EXPR_TYPE_GREATER_EQUAL) + && left_section == right_section + && (left_section != undefined_section || symbol->value.add_symbol == symbol->value.op_symbol))) + { + + if (finalize_symbols) { + report_op_error (symbol, symbol->value.add_symbol, symbol->value.type, symbol->value.op_symbol); + } else { + can_move_into_absolute_section = 0; + } + + } + + if (can_move_into_absolute_section && (final_section == expr_section || final_section == undefined_section)) { + final_section = absolute_section; + } + + if ((symbol->value.type == EXPR_TYPE_DIVIDE || symbol->value.type == EXPR_TYPE_MODULUS) && right_value == 0) { + + const char *filename; + unsigned long line_number; + + if (expr_symbol_get_filename_and_line_number (symbol, &filename, &line_number) == 0) { + report_at (filename, line_number, REPORT_ERROR, "division by zero"); + } else { + report_at (0, 0, REPORT_ERROR, "division by zero when setting '%s'", symbol_get_name (symbol)); + } + + right_value = 1; + + } + + switch (symbol->value.type) { + + case EXPR_TYPE_LOGICAL_OR: + + left_value = left_value || right_value; + break; + + case EXPR_TYPE_LOGICAL_AND: + + left_value = left_value && right_value; + break; + + case EXPR_TYPE_EQUAL: + case EXPR_TYPE_NOT_EQUAL: + + left_value = ((left_value == right_value && left_section == right_section && (left_section != undefined_section || symbol->value.add_symbol == symbol->value.op_symbol)) ? ~(signed long) 0 : 0); + + if (symbol->value.type == EXPR_TYPE_NOT_EQUAL) { + left_value = ~left_value; + } + + break; + + case EXPR_TYPE_LESSER: + + left_value = left_value < right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_LESSER_EQUAL: + + left_value = left_value <= right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER: + + left_value = left_value > right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_GREATER_EQUAL: + + left_value = left_value >= right_value ? ~(signed long) 0 : 0; + break; + + case EXPR_TYPE_ADD: + + left_value += right_value; + break; + + case EXPR_TYPE_SUBTRACT: + + left_value -= right_value; + break; + + case EXPR_TYPE_BIT_INCLUSIVE_OR: + + left_value |= right_value; + break; + + case EXPR_TYPE_BIT_EXCLUSIVE_OR: + + left_value ^= right_value; + break; + + case EXPR_TYPE_BIT_AND: + + left_value &= right_value; + break; + + case EXPR_TYPE_MULTIPLY: + + left_value *= right_value; + break; + + case EXPR_TYPE_DIVIDE: + + left_value /= right_value; + break; + + case EXPR_TYPE_MODULUS: + + left_value %= right_value; + break; + + case EXPR_TYPE_LEFT_SHIFT: + + left_value = ~(unsigned long) left_value << ~(unsigned long) right_value; + break; + + case EXPR_TYPE_RIGHT_SHIFT: + + left_value = ~(unsigned long) left_value >> ~(unsigned long) right_value; + break; + + default: + + break; + + } + + final_value += symbol->frag->address + left_value; + + if (final_section == expr_section || final_section == undefined_section) { + + if (left_section == undefined_section || right_section == undefined_section) { + final_section = undefined_section; + } else if (left_section == absolute_section) { + final_section = right_section; + } else { + final_section = left_section; + } + + } + + resolved = (symbol_is_resolved (symbol->value.add_symbol) && symbol_is_resolved (symbol->value.op_symbol)); + break; + + default: + + report_at (__FILE__, __LINE__, REPORT_INTERNAL_ERROR, "symbol_resolve_value invalid case %i", symbol->value.type); + exit (EXIT_FAILURE); + + } + + symbol->resolving = 0; + + } + + if (finalize_symbols) { + symbol_set_value (symbol, final_value); + } + +exit_do_not_set_value: + + if (finalize_symbols) { + + if (resolved) { + symbol->resolved = resolved; + } + + } + + symbol_set_section (symbol, final_section); + return final_value; + +} + +void symbol_add_to_chain (struct symbol *symbol) { + + *pointer_to_pointer_to_next_symbol = symbol; + pointer_to_pointer_to_next_symbol = &symbol->next; + +} + +void symbol_set_frag (struct symbol *symbol, struct frag *frag) { + symbol->frag = frag; +} + +void symbol_set_external (struct symbol *symbol) { + symbol->flags |= SYMBOL_FLAG_EXTERNAL; +} + +void symbol_set_section (struct symbol *symbol, struct section *section) { + symbol->section = section; +} + +void symbol_set_value (struct symbol *symbol, unsigned long value) { + + symbol->value.type = EXPR_TYPE_CONSTANT; + symbol->value.add_number = value; + +} + +void symbol_set_value_expression (struct symbol *symbol, struct expr *expr) { + symbol->value = *expr; +} diff --git a/symbol.h b/symbol.h new file mode 100644 index 0000000..3c5760e --- /dev/null +++ b/symbol.h @@ -0,0 +1,69 @@ +/****************************************************************************** + * @file symbol.h + *****************************************************************************/ +#ifndef _SYMBOL_H +#define _SYMBOL_H + +#include "expr.h" + +#define SYMBOL_FLAG_EXTERNAL 0x01 +#define SYMBOL_FLAG_SECTION_SYMBOL 0x02 + +struct symbol { + + char *name; + int scope; + + struct section *section; + struct frag *frag; + + struct expr value; + int flags; + + int resolved, resolving; + struct symbol *next; + +}; + +#define FAKE_LABEL_NAME "FAKE_SASM_SYMBOL" +extern struct symbol *symbols; +extern int finalize_symbols; + +#define SYMBOL_SCOPE_LOCAL 0x01 +#define SYMBOL_SCOPE_GLOBAL 0x02 +#define SYMBOL_SCOPE_EXTERN 0x03 + +struct expr *symbol_get_value_expression (struct symbol *symbol); + +char *symbol_get_name (struct symbol *symbol); +int get_symbol_snapshot (struct symbol **symbol_p, unsigned long *value_p, struct section **section_p, struct frag **frag_p); + +struct frag *symbol_get_frag (struct symbol *symbol); +struct section *symbol_get_section (struct symbol *symbol); + +struct symbol *symbol_create (const char *name, struct section *section, unsigned long value, struct frag *frag); +struct symbol *symbol_find (const char *name); +struct symbol *symbol_find_or_make (char *name, int scope); +struct symbol *symbol_label (char *start, char *caret, char *name); +struct symbol *symbol_make (const char *name); +struct symbol *symbol_temp_new_now (void); + +int symbol_force_reloc (struct symbol *symbol); +int symbol_is_external (struct symbol *symbol); +int symbol_is_resolved (struct symbol *symbol); +int symbol_is_section_symbol (struct symbol *symbol); +int symbol_is_undefined (struct symbol *symbol); +int symbol_uses_other_symbol (struct symbol *symbol); +int symbol_uses_reloc_symbol (struct symbol *symbol); + +unsigned long symbol_get_value (struct symbol *symbol); +unsigned long symbol_resolve_value (struct symbol *symbol); + +void symbol_add_to_chain (struct symbol *symbol); +void symbol_set_frag (struct symbol *symbol, struct frag *frag); +void symbol_set_external (struct symbol *symbol); +void symbol_set_section (struct symbol *symbol, struct section *section); +void symbol_set_value (struct symbol *symbol, unsigned long value); +void symbol_set_value_expression (struct symbol *symbol, struct expr *expr); + +#endif /* _SYMBOL_H */ diff --git a/vector.c b/vector.c new file mode 100644 index 0000000..3984039 --- /dev/null +++ b/vector.c @@ -0,0 +1,54 @@ +/****************************************************************************** + * @file vector.c + *****************************************************************************/ +#include +#include + +#include "vector.h" + +extern void *xrealloc (void *__ptr, unsigned int __size); + +int vec_adjust (struct vector *vec, int length) { + + if (vec->capacity <= length) { + + if (vec->capacity == 0) { + vec->capacity = 16; + } else { + vec->capacity <<= 1; + } + + vec->data = xrealloc (vec->data, sizeof (*(vec->data)) * vec->capacity); + + } + + return 0; + +} + +void *vec_pop (struct vector *vec) { + + if (!vec || vec == NULL) { + return NULL; + } + + if (vec->length == 0) { + return NULL; + } + + return vec->data[--vec->length]; + +} + +int vec_push (struct vector *vec, void *elem) { + + int ret; + + if ((ret = vec_adjust (vec, vec->length)) != 0) { + return ret; + } + + vec->data[vec->length++] = elem; + return 0; + +} diff --git a/vector.h b/vector.h new file mode 100644 index 0000000..29d957a --- /dev/null +++ b/vector.h @@ -0,0 +1,19 @@ +/****************************************************************************** + * @file vector.h + *****************************************************************************/ +#ifndef _VECTOR_H +#define _VECTOR_H + +struct vector { + + void **data; + int capacity, length; + +}; + +int vec_adjust (struct vector *vec, int length); +int vec_push (struct vector *vec, void *elem); + +void *vec_pop (struct vector *vec); + +#endif /* _VECTOR_H */ -- 2.34.1