From c21cffa5168680ea77fc610d0f9a811727581427 Mon Sep 17 00:00:00 2001 From: Robert Pengelly Date: Thu, 27 Feb 2025 21:15:48 +0000 Subject: [PATCH] Added elks executable generation and bug fixes --- aout.c | 21 +++--- elks.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- elks.h | 4 ++ ld.c | 4 +- lib.c | 12 ++-- link.c | 182 +++++++++++++++++++++++++++++++++++++++------- reloc.h | 1 + symbol.h | 2 +- 8 files changed, 385 insertions(+), 57 deletions(-) diff --git a/aout.c b/aout.c index 9308537..3848688 100644 --- a/aout.c +++ b/aout.c @@ -66,18 +66,12 @@ static unsigned char *write_relocs_for_section (unsigned char *pos, struct secti symbol = part->reloc_arr[i].symbol; if (symbol_is_undefined (symbol)) { - symbol = symbol_find (symbol->name); - - if (strcmp (symbol->name, "__etext") == 0 || strcmp (symbol->name, "__edata") == 0 || strcmp (symbol->name, "__end") == 0) { - continue; - } - } integer_to_array (part->rva - part->section->rva + part->reloc_arr[i].offset, rel.r_address, 4); - if (strcmp (symbol->part->section->name, ".text") == 0) { + if (!symbol->part || strcmp (symbol->part->section->name, ".text") == 0) { r_symbolnum = N_TEXT; } else if (strcmp (symbol->part->section->name, ".data") == 0) { r_symbolnum = N_DATA; @@ -87,6 +81,13 @@ static unsigned char *write_relocs_for_section (unsigned char *pos, struct secti r_symbolnum = N_TEXT; } + if ((part->reloc_arr[i].symbolnum >> 27) & 1) { + + /*report_at (part->of->filename, 0, REPORT_INTERNAL_ERROR, "symbolnum: %lx", part->reloc_arr[i].symbolnum);*/ + r_symbolnum |= (1LU << 27); + + } + integer_to_array (r_symbolnum | (size_log2 << 25), rel.r_symbolnum, 4); memcpy (pos, &rel, sizeof (rel)); @@ -135,13 +136,13 @@ void aout_write (const char *filename) { } else { + integer_to_array (0, exec.a_data, 4); + if (bss_section) { - integer_to_array (bss_section->rva - data_section->rva, exec.a_text, 4); + integer_to_array (bss_section->rva, exec.a_text, 4); } else { integer_to_array (text_section ? text_section->total_size : 0, exec.a_text, 4); } - - integer_to_array (0, exec.a_data, 4); } diff --git a/elks.c b/elks.c index 2c17f53..7d924bf 100644 --- a/elks.c +++ b/elks.c @@ -1,6 +1,8 @@ /****************************************************************************** * @file elks.c *****************************************************************************/ +#include +#include #include #include "elks.h" @@ -11,7 +13,7 @@ #include "section.h" #include "symbol.h" -static void translate_relocation (const char *filename, struct reloc_entry *reloc, struct elks_relocation_info *input_reloc, struct section_part *part, struct elks_exec *exec_p) { +static void translate_relocation (struct reloc_entry *reloc, struct elks_relocation_info *input_reloc, struct section_part *part, struct elks_exec *exec_p) { unsigned long r_symbolnum = array_to_integer (input_reloc->r_symbolnum, 4); @@ -19,17 +21,15 @@ static void translate_relocation (const char *filename, struct reloc_entry *relo long symbolnum = (r_symbolnum & 0x7ffffff); if ((r_symbolnum >> 31) & 1) { /* ext */ - reloc->symbol = part->of->symbol_arr + symbolnum; - } else { - if (state->format == LD_FORMAT_BIN || state->format == LD_FORMAT_COM) { - - if ((((r_symbolnum & (3 << 29)) >> 28) & 0xff) != N_ABS) { - report_at (program_name, 0, REPORT_ERROR, "%s: segment relocation at %04x:%04x", filename, r_address / 0xffff, r_address % 0xffff); - } + reloc->symbol = part->of->symbol_arr + symbolnum; + if (xstrcasecmp (reloc->symbol->name, "DGROUP") == 0) { + r_symbolnum &= ~(1LU << 31); } - + + } else { + if (symbolnum == N_TEXT) { reloc->symbol = part->of->symbol_arr + part->of->symbol_cnt - 3; } else if (symbolnum == N_DATA) { @@ -51,6 +51,7 @@ static void translate_relocation (const char *filename, struct reloc_entry *relo } + reloc->symbolnum = r_symbolnum; reloc->offset = r_address; switch (1U << ((r_symbolnum >> 29) & 3)) { @@ -246,6 +247,7 @@ void read_elks_object (const char *filename, unsigned char *data, unsigned long symbol->value = array_to_integer (elks_nlist->n_value, 4); symbol->size = 0; + symbol->n_type = elks_nlist->n_type; if ((elks_nlist->n_type & N_TYPE) == N_UNDF || (elks_nlist->n_type & N_TYPE) == N_COMM) { @@ -340,7 +342,7 @@ void read_elks_object (const char *filename, unsigned char *data, unsigned long for (i = 0; i < part->reloc_cnt; i++) { reloc_info = (struct elks_relocation_info *) (pos + (sizeof (*reloc_info) * i)); - translate_relocation (filename, part->reloc_arr + i, reloc_info, part, elks_exec); + translate_relocation (part->reloc_arr + i, reloc_info, part, elks_exec); } @@ -353,8 +355,200 @@ void read_elks_object (const char *filename, unsigned char *data, unsigned long for (i = 0; i < part->reloc_cnt; i++) { reloc_info = (struct elks_relocation_info *) (pos + (sizeof (*reloc_info) * i)); - translate_relocation (filename, part->reloc_arr + i, reloc_info, part, elks_exec); + translate_relocation (part->reloc_arr + i, reloc_info, part, elks_exec); + + } + +} + + +static unsigned long section_get_num_relocs (struct section *section) { + + unsigned long num_relocs = 0, i; + + struct section_part *part; + struct reloc_howto *howto; + + for (part = section->first_part; part; part = part->next) { + + for (i = 0; i < part->reloc_cnt; i++) { + + howto = part->reloc_arr[i].howto; + + if (howto == &reloc_howtos[RELOC_TYPE_64] || howto == &reloc_howtos[RELOC_TYPE_32] || howto == &reloc_howtos[RELOC_TYPE_16] || howto == &reloc_howtos[RELOC_TYPE_8]) { + num_relocs++; + } + + } + + } + + return num_relocs; + +} + +static unsigned char *write_relocs_for_section (unsigned char *pos, struct section *section) { + + struct section_part *part; + struct elks_relocation_info rel; + + struct symbol *symbol; + unsigned long size_log2, i, r_symbolnum; + + for (part = section->first_part; part; part = part->next) { + + for (i = 0; i < part->reloc_cnt; i++) { + + memset (&rel, 0, sizeof (rel)); + + if (part->reloc_arr[i].howto == &reloc_howtos[RELOC_TYPE_64]) { + size_log2 = 3; + } else if (part->reloc_arr[i].howto == &reloc_howtos[RELOC_TYPE_32]) { + size_log2 = 2; + } else if (part->reloc_arr[i].howto == &reloc_howtos[RELOC_TYPE_16]) { + size_log2 = 1; + } else if (part->reloc_arr[i].howto == &reloc_howtos[RELOC_TYPE_8]) { + size_log2 = 0; + } else { + continue; + } + + symbol = part->reloc_arr[i].symbol; + + if (symbol_is_undefined (symbol)) { + symbol = symbol_find (symbol->name); + } + + integer_to_array (part->rva - part->section->rva + part->reloc_arr[i].offset, rel.r_address, 4); + + if (!symbol->part || strcmp (symbol->part->section->name, ".text") == 0) { + r_symbolnum = N_TEXT; + } else if (strcmp (symbol->part->section->name, ".data") == 0) { + r_symbolnum = N_DATA; + } else if (strcmp (symbol->part->section->name, ".bss") == 0) { + r_symbolnum = N_BSS; + } else { + r_symbolnum = N_TEXT; + } + + if ((part->reloc_arr[i].symbolnum >> 27) & 1) { + r_symbolnum |= (1LU << 27); + } + + integer_to_array (r_symbolnum | (size_log2 << 25), rel.r_symbolnum, 4); + + memcpy (pos, &rel, sizeof (rel)); + pos += sizeof (rel); + + } + + } + + return pos; + +} + +void elks_write (const char *filename) { + + FILE *fp; + + unsigned char *data, *pos; + unsigned long data_size; + + struct section *text_section, *data_section, *bss_section; + struct elks_exec exec = { 0 }; + + if (!(fp = fopen (filename, "wb"))) { + + report_at (program_name, 0, REPORT_ERROR, "failed to open '%s' for writing", filename); + return; + + } + + text_section = section_find (".text"); + data_section = section_find (".data"); + bss_section = section_find (".bss"); + + integer_to_array (ELKS_MAGIC, exec.a_magic, 4); + + exec.a_flags = 0x10; + exec.a_cpu = (state->format == LD_FORMAT_I386_ELKS) ? 0x10 : 0x04; + exec.a_hdrlen = sizeof (exec); + + if (data_section) { + + integer_to_array (data_section->rva, exec.a_text, 4); + + if (bss_section) { + integer_to_array (bss_section->rva - data_section->rva, exec.a_data, 4); + } else { + integer_to_array (data_section->total_size, exec.a_data, 4); + } + + } else { + + integer_to_array (0, exec.a_data, 4); + + if (bss_section) { + integer_to_array (bss_section->rva, exec.a_text, 4); + } else { + integer_to_array (text_section ? text_section->total_size : 0, exec.a_text, 4); + } + + } + + integer_to_array (bss_section ? bss_section->total_size : 0, exec.a_bss, 4); + integer_to_array (state->entry_point, exec.a_entry, 4); + + integer_to_array (array_to_integer (exec.a_text, 4) + array_to_integer (exec.a_data, 4), exec.a_total, 4); + + if (text_section) { + integer_to_array (section_get_num_relocs (text_section) * sizeof (struct elks_relocation_info), exec.a_trsize, 4); + } + + if (data_section) { + integer_to_array (section_get_num_relocs (data_section) * sizeof (struct elks_relocation_info), exec.a_drsize, 4); + } + + data_size = sizeof (exec); + + data_size += (array_to_integer (exec.a_text, 4) + array_to_integer (exec.a_data, 4)); + data_size += (array_to_integer (exec.a_trsize, 4) + array_to_integer (exec.a_drsize, 4)); + + data_size += 4; + + data = xmalloc (data_size); + memcpy (data, &exec, sizeof (exec)); + pos = data + sizeof (exec); + + if (text_section) { + section_write (text_section, pos); + } + + pos += array_to_integer (exec.a_text, 4); + + if (data_section) { + section_write (data_section, pos); + } + + pos += array_to_integer (exec.a_data, 4); + + if (text_section) { + pos = write_relocs_for_section (pos, text_section); } + + if (data_section) { + pos = write_relocs_for_section (pos, data_section); + } + + integer_to_array (4, pos, 4); + + if (fwrite (data, data_size, 1, fp) != 1) { + report_at (program_name, 0, REPORT_ERROR, "failed to write data to '%s'", filename); + } + + free (data); + fclose (fp); } diff --git a/elks.h b/elks.h index 2a592d3..6f9b136 100644 --- a/elks.h +++ b/elks.h @@ -6,6 +6,7 @@ struct elks_exec { + /* offset 0 */ unsigned char a_magic[2]; unsigned char a_flags; unsigned char a_cpu; @@ -13,6 +14,7 @@ struct elks_exec { unsigned char a_unused; unsigned char a_version[2]; + /* offset 8 */ unsigned char a_text[4]; unsigned char a_data[4]; unsigned char a_bss[4]; @@ -20,6 +22,7 @@ struct elks_exec { unsigned char a_total[4]; unsigned char a_syms[4]; + /* offset 32 */ unsigned char a_trsize[4]; unsigned char a_drsize[4]; unsigned char a_trbase[4]; @@ -58,6 +61,7 @@ struct elks_nlist { #define N_TYPE 0x1e #define ELKS_MAGIC 0403 +void elks_write (const char *filename); void read_elks_object (const char *filename, unsigned char *data, unsigned long data_size); #endif /* _ELKS_H */ diff --git a/ld.c b/ld.c index 9d0df7b..2f95229 100644 --- a/ld.c +++ b/ld.c @@ -196,9 +196,9 @@ int main (int argc, char **argv) { } else if (state->format == LD_FORMAT_I386_AOUT) { aout_write (state->output_filename); - }/* else if (state->format == LD_FORMAT_I386_ELKS || state->format == LD_FORMAT_IA16_ELKS) { + } else if (state->format == LD_FORMAT_I386_ELKS || state->format == LD_FORMAT_IA16_ELKS) { elks_write (state->output_filename); - }*/ else if (state->format == LD_FORMAT_I386_PE) { + } else if (state->format == LD_FORMAT_I386_PE) { pe_after_link (); pe_write (state->output_filename); diff --git a/lib.c b/lib.c index 4fd0116..0c937d7 100644 --- a/lib.c +++ b/lib.c @@ -96,8 +96,8 @@ static void print_usage (void) { fprintf (stderr, " --help Print option help.\n"); fprintf (stderr, " --oformat FORMAT Specify the format of output file (default msdos)\n"); fprintf (stderr, " Supported formats are:\n"); - /*fprintf (stderr, " a.out-i386, elks-ia16, elks-i386,\n");*/ - fprintf (stderr, " a.out-i386, pe-i386, binary, msdos\n"); + fprintf (stderr, " a.out-i386, elks-ia16, elks-i386, pe-i386\n"); + fprintf (stderr, " binary, msdos\n"); fprintf (stderr, " --image-base
Set base address of the executable.\n"); fprintf (stderr, "\n"); @@ -159,19 +159,19 @@ static void use_option (const char *cmd_arg, int idx, const char *optarg) { } - /*if (xstrcasecmp (optarg, "elks-ia16") == 0) { + if (xstrcasecmp (optarg, "elks-ia16") == 0) { state->format = LD_FORMAT_IA16_ELKS; break; - }*/ + } - /*if (xstrcasecmp (optarg, "elks-i386") == 0) { + if (xstrcasecmp (optarg, "elks-i386") == 0) { state->format = LD_FORMAT_I386_ELKS; break; - }*/ + } if (xstrcasecmp (optarg, "a.out-i386") == 0) { diff --git a/link.c b/link.c index 74ba25d..7b78854 100644 --- a/link.c +++ b/link.c @@ -6,6 +6,7 @@ #include #include +#include "elks.h" #include "ld.h" #include "lib.h" #include "pe.h" @@ -32,6 +33,19 @@ struct reloc_howto reloc_howtos[RELOC_TYPE_END] = { }; +static unsigned long dgroup_start = 0; +static int has_dgroup = 0; + +static unsigned long align_section_if_needed (unsigned long value) { + + if (state->format == LD_FORMAT_I386_PE) { + return pe_align_to_file_alignment (value); + } + + return value; + +} + static void output_symbols (void) { struct section *section = section_find_or_make (".data"); @@ -277,41 +291,44 @@ static void calculate_section_sizes_and_rvas (void) { static void reloc_generic (struct section_part *part, struct reloc_entry *rel, struct symbol *symbol) { - unsigned long result = 0; + unsigned char opcode = (part->content + rel->offset - 1)[0]; + unsigned int size = rel->howto->size; - switch (rel->howto->size) { + unsigned long result = 0, offset = rel->offset; + + switch (size) { case 8: { - result = array_to_integer (part->content + rel->offset, 8); + result = array_to_integer (part->content + offset, 8); break; } case 4: { - result = array_to_integer (part->content + rel->offset, 4); + result = array_to_integer (part->content + offset, 4); break; } case 3: { - result = array_to_integer (part->content + rel->offset, 3); + result = array_to_integer (part->content + offset, 3); break; } case 2: { - result = array_to_integer (part->content + rel->offset, 2); + result = array_to_integer (part->content + offset, 2); break; } case 1: { - result = array_to_integer (part->content + rel->offset, 1); + result = array_to_integer (part->content + offset, 1); break; } @@ -333,57 +350,155 @@ static void reloc_generic (struct section_part *part, struct reloc_entry *rel, s if (rel->howto->pc_rel) { - result -= (part->rva + rel->offset); - result -= rel->howto->size; + result -= (part->rva + offset); + result -= size; } } else { + result += symbol_get_value_with_base (symbol); + + if (state->format == LD_FORMAT_BIN || state->format == LD_FORMAT_COM) { + + if (!((rel->symbolnum >> 31) & 1)/* || (symbol->n_type & N_TYPE) == N_BSS || (symbol->n_type & N_TYPE) == N_DATA || (symbol->n_type & N_TYPE) == N_TEXT*/) { + + /*report_at (__FILE__, __LINE__, REPORT_FATAL_ERROR, "symbol: %s, %lx", symbol->name, ((rel->symbolnum) >> 28));*/ + + if (((rel->symbolnum >> 27) & 1) || (((rel->symbolnum) >> 28) & 0xff) != N_ABS) { + report_at (program_name, 0, REPORT_ERROR, "%s:(%s+%#lu): segment relocation", part->of->filename, part->section->name, offset); + } + + } + + } + + if (xstrcasecmp (symbol->name, "__etext") == 0 || xstrcasecmp (symbol->name, "__edata") == 0 || xstrcasecmp (symbol->name, "__end") == 0) { + + if (rel->howto == &reloc_howtos[RELOC_TYPE_64]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC64]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_32]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC32]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_16]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC16]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_8]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC8]; + } + + } + } - if ((unsigned long) rel->howto->size < sizeof (result)) { + if ((unsigned long) size < sizeof (result)) { - unsigned long mask = (((unsigned long) 1) << (CHAR_BIT * rel->howto->size)) - 1; + unsigned long mask = (((unsigned long) 1) << (CHAR_BIT * size)) - 1; result &= mask; } result >>= rel->howto->final_right_shift; - switch (rel->howto->size) { + if (!rel->howto->pc_rel && !rel->howto->no_base) { + + if (has_dgroup) { + + if (xstrcasecmp (symbol->name, "__etext") != 0 && xstrcasecmp (symbol->name, "__edata") != 0 && xstrcasecmp (symbol->name, "__end") != 0) { + + if (result >= dgroup_start) { + result -= (dgroup_start & 0xfffffff0); + } + + } + + } + + } + + if (opcode == 0xff) { + + if (result >= 65535) { + + report_at (program_name, 0, REPORT_ERROR, "%s:(%s+%#lu): call exceeds 65535 bytes", part->of->filename, part->section->name, offset); + + if (rel->howto == &reloc_howtos[RELOC_TYPE_PC64]) { + rel->howto = &reloc_howtos[RELOC_TYPE_64]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_PC32]) { + rel->howto = &reloc_howtos[RELOC_TYPE_32]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_PC16]) { + rel->howto = &reloc_howtos[RELOC_TYPE_16]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_PC8]) { + rel->howto = &reloc_howtos[RELOC_TYPE_8]; + } + + result = (((unsigned long) result / 16) << 16) | (unsigned long) result % 16; + + } else { + + unsigned char *p = part->content + offset; + + if (rel->howto == &reloc_howtos[RELOC_TYPE_64]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC64]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_32]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC32]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_16]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC16]; + } else if (rel->howto == &reloc_howtos[RELOC_TYPE_8]) { + rel->howto = &reloc_howtos[RELOC_TYPE_PC8]; + } + + *(p - 1) = 0x0E; + *(p + 0) = 0xE8; + *(p + 3) = 0x90; + + result -= part->rva + rel->offset; + + while (size > 2) { + + result--; + size--; + + } + + offset++; + result--; + + } + + } + + switch (size) { case 8: { - integer_to_array (result, part->content + rel->offset, 8); + integer_to_array (result, part->content + offset, 8); break; } case 4: { - integer_to_array (result, part->content + rel->offset, 4); + integer_to_array (result, part->content + offset, 4); break; } case 3: { - integer_to_array (result, part->content + rel->offset, 3); + integer_to_array (result, part->content + offset, 3); break; } case 2: { - integer_to_array (result, part->content + rel->offset, 2); + integer_to_array (result, part->content + offset, 2); break; } case 1: { - integer_to_array (result, part->content + rel->offset, 1); + integer_to_array (result, part->content + offset, 1); break; } @@ -521,16 +636,6 @@ static void calculate_entry_point (void) { } -static unsigned long align_section_if_needed (unsigned long value) { - - if (state->format == LD_FORMAT_I386_PE) { - return pe_align_to_file_alignment (value); - } - - return value; - -} - void link (void) { unsigned long value = 0, offset = 0; @@ -603,6 +708,29 @@ void link (void) { value = 0; + if ((symbol = symbol_find ("DGROUP")) && symbol_is_undefined (symbol)) { + + if ((section = section_find (".text"))) { + value = align_section_if_needed (section->total_size); + } + + dgroup_start = value; + has_dgroup = 1; + + of = object_file_make (FAKE_LD_FILENAME, 1); + + symbol = of->symbol_arr; + symbol->name = xstrdup ("DGROUP"); + + symbol->section_number = ABSOLUTE_SECTION_NUMBER; + symbol->value = value / 16; + + symbol_record_external_symbol (symbol); + + } + + value = 0; + if ((symbol = symbol_find ("__etext")) && symbol_is_undefined (symbol)) { if ((section = section_find (".text"))) { diff --git a/reloc.h b/reloc.h index 118719e..8aa26e9 100644 --- a/reloc.h +++ b/reloc.h @@ -49,6 +49,7 @@ struct reloc_entry { unsigned long offset; unsigned long addend; + unsigned long symbolnum; struct reloc_howto *howto; }; diff --git a/symbol.h b/symbol.h index 82e8c98..a7a3195 100644 --- a/symbol.h +++ b/symbol.h @@ -9,7 +9,7 @@ struct symbol { char *name; - int flags; + int flags, n_type; unsigned long value; unsigned long size; -- 2.34.1