From: Robert Pengelly Date: Fri, 26 Jun 2026 16:48:30 +0000 (+0100) Subject: 64-bit fixes X-Git-Url: https://git.candlhat.org/?a=commitdiff_plain;h=91ec845923d62310ff16557f665b2017a4df3dd9;p=scc.git 64-bit fixes --- diff --git a/amd64.c b/amd64.c index a2c7706..2c84230 100644 --- a/amd64.c +++ b/amd64.c @@ -27330,9 +27330,11 @@ static void emit_load_assignment_rhs_expression_to_pair (const char *lo, const c */ if (op == TOK_LSH || op == TOK_RSH || op == TOK_LSHEQ || op == TOK_RSHEQ) { + /* + * AMD64 64-bit scalar expressions live in RAX, not RDX:RAX. + * Keep only the real left operand while the shift count is parsed. + */ emit_push_reg_now ("rax"); - emit_push_reg_now ("rdx"); - emit_load_assignment_binary_expression_to_reg ("rbx"); if (state->syntax & ASM_SYNTAX_INTEL) { @@ -27341,28 +27343,18 @@ static void emit_load_assignment_rhs_expression_to_pair (const char *lo, const c fprintf (state->ofp, " xorq %%rcx, %%rcx\n"); } - emit_pop_reg_now ("rdx"); emit_pop_reg_now ("rax"); } else { /* - * The generic 64-bit RHS loader uses EAX:EDX as scratch even - * when asked to leave the final value in EBX:ECX. Preserve the - * left operand around RHS evaluation; otherwise expressions such - * as: - * - * result &= (((address_type)1) << n) - 1 - * - * end up applying the operator to the RHS twice, because the - * computed mask clobbers the original result in EAX:EDX. + * Native AMD64 64-bit binary ops use RAX and RBX. Do not route + * the RHS through RAX:RDX and then copy RDX into RCX; that is the + * old i386 pair model leaking into the AMD64 backend. */ emit_push_reg_now ("rax"); - emit_push_reg_now ("rdx"); emit_load_assignment_rhs_to_pair ("rbx", "rcx"); - - emit_pop_reg_now ("rdx"); emit_pop_reg_now ("rax"); } @@ -30774,26 +30766,20 @@ static int parse_identifier_assignment_statement (void) { } /* - * Compound assignments need the complete RHS expression. - * Using emit_load_assignment_rhs_to_pair() only consumes one - * primary operand, so e.g. + * AMD64 uint64_t is a native qword, not an i386-style + * EDX:EAX pair. Preserve only the LHS qword in RAX and load + * the complete RHS directly into the native RHS register RBX. * - * final_value += symbol->frag->address + left_value; + * Using RAX:RDX as a temporary RHS pair created stale RDX + * saves/restores and useless MOV RCX,RDX instructions in code + * such as: * - * leaves the second + operand for the statement parser and - * reports "expected ;". Evaluate the full RHS in rax:rdx, - * copy it to the RHS pair rbx:rcx, then restore the original - * LHS value before applying the compound operator. + * h ^= *p++; + * h *= UINT64_C (...); */ emit_push_reg_now ("rax"); - emit_push_reg_now ("rdx"); - - emit_load_assignment_rhs_expression_to_pair ("rax", "rdx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name)); - emit_mov_reg_to_reg_now ("rbx", "rax"); - emit_mov_reg_to_reg_now ("rcx", "rdx"); - - emit_pop_reg_now ("rdx"); + emit_load_assignment_rhs_expression_to_pair ("rbx", "rcx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name)); emit_pop_reg_now ("rax"); emit_preserve_assignment64_regs (op); @@ -33548,26 +33534,20 @@ static void parse_for_header_expression_until (enum token_kind end_token) { } /* - * Compound assignments need the complete RHS expression. - * Using emit_load_assignment_rhs_to_pair() only consumes one - * primary operand, so e.g. + * AMD64 uint64_t is a native qword, not an i386-style + * EDX:EAX pair. Preserve only the LHS qword in RAX and load + * the complete RHS directly into the native RHS register RBX. * - * final_value += symbol->frag->address + left_value; + * Using RAX:RDX as a temporary RHS pair created stale RDX + * saves/restores and useless MOV RCX,RDX instructions in code + * such as: * - * leaves the second + operand for the statement parser and - * reports "expected ;". Evaluate the full RHS in rax:rdx, - * copy it to the RHS pair rbx:rcx, then restore the original - * LHS value before applying the compound operator. + * h ^= *p++; + * h *= UINT64_C (...); */ emit_push_reg_now ("rax"); - emit_push_reg_now ("rdx"); - - emit_load_assignment_rhs_expression_to_pair ("rax", "rdx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name)); - emit_mov_reg_to_reg_now ("rbx", "rax"); - emit_mov_reg_to_reg_now ("rcx", "rdx"); - - emit_pop_reg_now ("rdx"); + emit_load_assignment_rhs_expression_to_pair ("rbx", "rcx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name)); emit_pop_reg_now ("rax"); emit_preserve_assignment64_regs (op); diff --git a/i386.c b/i386.c index cfa5531..c6854f5 100644 --- a/i386.c +++ b/i386.c @@ -12514,10 +12514,17 @@ static void emit_assignment_binary_op64 (enum token_kind op, int is_unsigned) { fprintf (state->ofp, " mov esi, eax\n"); fprintf (state->ofp, " mov edi, edx\n"); fprintf (state->ofp, " mul ebx\n"); - fprintf (state->ofp, " mov esi, edx\n"); - fprintf (state->ofp, " mov edx, edi\n"); - fprintf (state->ofp, " imul edx, ebx\n"); - fprintf (state->ofp, " add edx, esi\n"); + + fprintf (state->ofp, " push eax\n"); + fprintf (state->ofp, " mov eax, esi\n"); + + fprintf (state->ofp, " imul eax, ecx\n"); + fprintf (state->ofp, " imul edi, ebx\n"); + + fprintf (state->ofp, " add edx, eax\n"); + fprintf (state->ofp, " add edx, edi\n"); + + fprintf (state->ofp, " pop eax\n"); break; case TOK_BSLASH: case TOK_SLASHEQ: @@ -12634,10 +12641,17 @@ static void emit_assignment_binary_op64 (enum token_kind op, int is_unsigned) { fprintf (state->ofp, " movl %%eax, %%esi\n"); fprintf (state->ofp, " movl %%edx, %%edi\n"); fprintf (state->ofp, " mull %%ebx\n"); - fprintf (state->ofp, " movl %%edx, %%esi\n"); - fprintf (state->ofp, " movl %%edi, %%edx\n"); - fprintf (state->ofp, " imull %%ebx, %%edx\n"); - fprintf (state->ofp, " addl %%esi, %%edx\n"); + + fprintf (state->ofp, " pushl %%eax\n"); + fprintf (state->ofp, " movl %%esi, %%eax\n"); + + fprintf (state->ofp, " imull %%ecx, %%eax\n"); + fprintf (state->ofp, " imull %%ebx, %%edi\n"); + + fprintf (state->ofp, " addl %%eax, %%edx\n"); + fprintf (state->ofp, " addl %%edi, %%edx\n"); + + fprintf (state->ofp, " popl %%eax\n"); break; case TOK_BSLASH: case TOK_SLASHEQ: @@ -24123,6 +24137,41 @@ static void emit_load_assignment_rhs_expression_to_reg (const char *reg) { } + /* + * A 32-bit context can still contain a 64-bit integer expression whose + * final value is converted down to int/long/pointer. For example: + * + * (uint32_t)(checksum >> 32) + * + * The old path entered the 32-bit expression parser, loaded only the low + * word of checksum, and then emitted a 32-bit SHR. Counts of 32/48 are + * masked by x86, so the generated code used the wrong half of the value + * and produced repeated checksum chunks. Evaluate such expressions with + * the 64-bit parser first, then keep the low result word as the normal C + * conversion to the requested 32-bit context. + */ + if (current_expression_mentions_64bit_symbol_now () || + tok.kind == TOK_CLLONG || tok.kind == TOK_CULLONG || + source_starts_64bit_integer_cast_now (tok.start) || + source_starts_64bit_integer_cast_now (tok.caret)) { + + int is_unsigned = rhs_current_operand_is_unsigned_now (); + emit_load_assignment_rhs_expression_to_pair ("eax", "edx", is_unsigned); + + if (state->ofp && strcmp (reg, "eax") != 0) { + + if (state->syntax & ASM_SYNTAX_INTEL) { + fprintf (state->ofp, " mov %s, eax\n", reg); + } else { + fprintf (state->ofp, " movl %%eax, %%%s\n", reg); + } + + } + + return; + + } + emit_load_assignment_compare_expression_to_reg (reg); for (;;) { diff --git a/int64.c b/int64.c index dc100f8..257524e 100644 --- a/int64.c +++ b/int64.c @@ -12,20 +12,14 @@ extern unsigned long get_line_number (void); void add64 (int64_s *a, int64_s b) { - unsigned long raw_sum, carry; + unsigned long al = a->low & U32_MASK; + unsigned long bl = b.low & U32_MASK; - /* 1. Calculate the raw result */ - raw_sum = a->low + b.low; - - /* 2. Manually force 32-bit wrap around using a mask */ - /* This ensures that any "1" that would be in the 33rd bit is stripped */ - a->low = raw_sum & 0xFFFFFFFFUL; - - /* 3. Extract the carry: if raw_sum > 0xFFFFFFFF, we had a carry */ - carry = (raw_sum > 0xFFFFFFFFUL) ? 1 : 0; - - /* 4. Update the high part */ - a->high = (a->high + b.high + carry) & 0xFFFFFFFFUL; + unsigned long raw_sum = al + bl; + unsigned long carry = (raw_sum >> 32) & 1UL; + + a->low = raw_sum & U32_MASK; + a->high = ((a->high & U32_MASK) + (b.high & U32_MASK) + carry) & U32_MASK; } @@ -47,7 +41,7 @@ void mul64 (int64_s *a, int64_s b) { unsigned long mid = (p0 >> 16) + (p1 & 0xFFFF) + (p2 & 0xFFFF); /* Store the new low 32 bits */ - unsigned long new_low = ((mid << 16) | (p0 & 0xFFFF)); + unsigned long new_low = ((mid << 16) | (p0 & 0xFFFF)) & U32_MASK; /* 2. Calculate the high word */ /* Start with the carries/high-bits from the 32x32 multiply */ @@ -60,8 +54,8 @@ void mul64 (int64_s *a, int64_s b) { new_high += (b.high * a->low); /* Final Assignment */ - a->low = new_low; - a->high = new_high; + a->low = new_low & U32_MASK; + a->high = new_high & U32_MASK; } @@ -260,7 +254,7 @@ void mod64 (int64_s *a, int64_s b) { void zext64 (int64_s *dest, unsigned long input) { - dest->low = input; + dest->low = input & U32_MASK; dest->high = 0; /* Clear the upper bits */ } @@ -728,15 +722,16 @@ void parse_string_to_i64 (int64_s *val, const char *str) { val->high = (val->high * base) + carry2; /* Finalize low part */ - val->low = ((res_mid & 0xFFFF) << 16) | (res_low & 0xFFFF); + val->low = (((res_mid & 0xFFFF) << 16) | (res_low & 0xFFFF)) & U32_MASK; + val->high &= U32_MASK; /* Add the new digit */ old_low = val->low; - val->low += (unsigned long) digit; + val->low = (val->low + (unsigned long) digit) & U32_MASK; /* Handle carry from addition into the high word */ if (val->low < old_low) { - val->high++; + val->high = (val->high + 1) & U32_MASK; } str++;