From: Robert Pengelly <robertapengelly@hotmail.com>
Date: Fri, 26 Jun 2026 16:48:30 +0000 (+0100)
Subject: 64-bit fixes
X-Git-Url: https://git.candlhat.org/?a=commitdiff_plain;h=91ec845923d62310ff16557f665b2017a4df3dd9;p=scc.git

64-bit fixes
---

diff --git a/amd64.c b/amd64.c
index a2c7706..2c84230 100644
--- a/amd64.c
+++ b/amd64.c
@@ -27330,9 +27330,11 @@ static void emit_load_assignment_rhs_expression_to_pair (const char *lo, const c
          */
         if (op == TOK_LSH || op == TOK_RSH || op == TOK_LSHEQ || op == TOK_RSHEQ) {
         
+            /*
+             * AMD64 64-bit scalar expressions live in RAX, not RDX:RAX.
+             * Keep only the real left operand while the shift count is parsed.
+             */
             emit_push_reg_now ("rax");
-            emit_push_reg_now ("rdx");
-            
             emit_load_assignment_binary_expression_to_reg ("rbx");
             
             if (state->syntax & ASM_SYNTAX_INTEL) {
@@ -27341,28 +27343,18 @@ static void emit_load_assignment_rhs_expression_to_pair (const char *lo, const c
                 fprintf (state->ofp, "    xorq %%rcx, %%rcx\n");
             }
             
-            emit_pop_reg_now ("rdx");
             emit_pop_reg_now ("rax");
         
         } else {
         
             /*
-             * The generic 64-bit RHS loader uses EAX:EDX as scratch even
-             * when asked to leave the final value in EBX:ECX.  Preserve the
-             * left operand around RHS evaluation; otherwise expressions such
-             * as:
-             *
-             *     result &= (((address_type)1) << n) - 1
-             *
-             * end up applying the operator to the RHS twice, because the
-             * computed mask clobbers the original result in EAX:EDX.
+             * Native AMD64 64-bit binary ops use RAX and RBX.  Do not route
+             * the RHS through RAX:RDX and then copy RDX into RCX; that is the
+             * old i386 pair model leaking into the AMD64 backend.
              */
             emit_push_reg_now ("rax");
-            emit_push_reg_now ("rdx");
             
             emit_load_assignment_rhs_to_pair ("rbx", "rcx");
-            
-            emit_pop_reg_now ("rdx");
             emit_pop_reg_now ("rax");
         
         }
@@ -30774,26 +30766,20 @@ static int parse_identifier_assignment_statement (void) {
                 }
                 
                 /*
-                 * Compound assignments need the complete RHS expression.
-                 * Using emit_load_assignment_rhs_to_pair() only consumes one
-                 * primary operand, so e.g.
+                 * AMD64 uint64_t is a native qword, not an i386-style
+                 * EDX:EAX pair.  Preserve only the LHS qword in RAX and load
+                 * the complete RHS directly into the native RHS register RBX.
                  *
-                 *     final_value += symbol->frag->address + left_value;
+                 * Using RAX:RDX as a temporary RHS pair created stale RDX
+                 * saves/restores and useless MOV RCX,RDX instructions in code
+                 * such as:
                  *
-                 * leaves the second + operand for the statement parser and
-                 * reports "expected ;".  Evaluate the full RHS in rax:rdx,
-                 * copy it to the RHS pair rbx:rcx, then restore the original
-                 * LHS value before applying the compound operator.
+                 *     h ^= *p++;
+                 *     h *= UINT64_C (...);
                  */
                 emit_push_reg_now ("rax");
-                emit_push_reg_now ("rdx");
-                
-                emit_load_assignment_rhs_expression_to_pair ("rax", "rdx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
                 
-                emit_mov_reg_to_reg_now ("rbx", "rax");
-                emit_mov_reg_to_reg_now ("rcx", "rdx");
-                
-                emit_pop_reg_now ("rdx");
+                emit_load_assignment_rhs_expression_to_pair ("rbx", "rcx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
                 emit_pop_reg_now ("rax");
                 
                 emit_preserve_assignment64_regs (op);
@@ -33548,26 +33534,20 @@ static void parse_for_header_expression_until (enum token_kind end_token) {
                 }
                 
                 /*
-                 * Compound assignments need the complete RHS expression.
-                 * Using emit_load_assignment_rhs_to_pair() only consumes one
-                 * primary operand, so e.g.
+                 * AMD64 uint64_t is a native qword, not an i386-style
+                 * EDX:EAX pair.  Preserve only the LHS qword in RAX and load
+                 * the complete RHS directly into the native RHS register RBX.
                  *
-                 *     final_value += symbol->frag->address + left_value;
+                 * Using RAX:RDX as a temporary RHS pair created stale RDX
+                 * saves/restores and useless MOV RCX,RDX instructions in code
+                 * such as:
                  *
-                 * leaves the second + operand for the statement parser and
-                 * reports "expected ;".  Evaluate the full RHS in rax:rdx,
-                 * copy it to the RHS pair rbx:rcx, then restore the original
-                 * LHS value before applying the compound operator.
+                 *     h ^= *p++;
+                 *     h *= UINT64_C (...);
                  */
                 emit_push_reg_now ("rax");
-                emit_push_reg_now ("rdx");
-                
-                emit_load_assignment_rhs_expression_to_pair ("rax", "rdx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
                 
-                emit_mov_reg_to_reg_now ("rbx", "rax");
-                emit_mov_reg_to_reg_now ("rcx", "rdx");
-                
-                emit_pop_reg_now ("rdx");
+                emit_load_assignment_rhs_expression_to_pair ("rbx", "rcx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
                 emit_pop_reg_now ("rax");
                 
                 emit_preserve_assignment64_regs (op);
diff --git a/i386.c b/i386.c
index cfa5531..c6854f5 100644
--- a/i386.c
+++ b/i386.c
@@ -12514,10 +12514,17 @@ static void emit_assignment_binary_op64 (enum token_kind op, int is_unsigned) {
                 fprintf (state->ofp, "    mov esi, eax\n");
                 fprintf (state->ofp, "    mov edi, edx\n");
                 fprintf (state->ofp, "    mul ebx\n");
-                fprintf (state->ofp, "    mov esi, edx\n");
-                fprintf (state->ofp, "    mov edx, edi\n");
-                fprintf (state->ofp, "    imul edx, ebx\n");
-                fprintf (state->ofp, "    add edx, esi\n");
+                
+                fprintf (state->ofp, "    push eax\n");
+                fprintf (state->ofp, "    mov eax, esi\n");
+                
+                fprintf (state->ofp, "    imul eax, ecx\n");
+                fprintf (state->ofp, "    imul edi, ebx\n");
+                
+                fprintf (state->ofp, "    add edx, eax\n");
+                fprintf (state->ofp, "    add edx, edi\n");
+                
+                fprintf (state->ofp, "    pop eax\n");
                 break;
             
             case TOK_BSLASH:    case TOK_SLASHEQ:
@@ -12634,10 +12641,17 @@ static void emit_assignment_binary_op64 (enum token_kind op, int is_unsigned) {
                 fprintf (state->ofp, "    movl %%eax, %%esi\n");
                 fprintf (state->ofp, "    movl %%edx, %%edi\n");
                 fprintf (state->ofp, "    mull %%ebx\n");
-                fprintf (state->ofp, "    movl %%edx, %%esi\n");
-                fprintf (state->ofp, "    movl %%edi, %%edx\n");
-                fprintf (state->ofp, "    imull %%ebx, %%edx\n");
-                fprintf (state->ofp, "    addl %%esi, %%edx\n");
+                
+                fprintf (state->ofp, "    pushl %%eax\n");
+                fprintf (state->ofp, "    movl %%esi, %%eax\n");
+                
+                fprintf (state->ofp, "    imull %%ecx, %%eax\n");
+                fprintf (state->ofp, "    imull %%ebx, %%edi\n");
+                
+                fprintf (state->ofp, "    addl %%eax, %%edx\n");
+                fprintf (state->ofp, "    addl %%edi, %%edx\n");
+                
+                fprintf (state->ofp, "    popl %%eax\n");
                 break;
             
             case TOK_BSLASH:    case TOK_SLASHEQ:
@@ -24123,6 +24137,41 @@ static void emit_load_assignment_rhs_expression_to_reg (const char *reg) {
     
     }
     
+    /*
+     * A 32-bit context can still contain a 64-bit integer expression whose
+     * final value is converted down to int/long/pointer.  For example:
+     *
+     *     (uint32_t)(checksum >> 32)
+     *
+     * The old path entered the 32-bit expression parser, loaded only the low
+     * word of checksum, and then emitted a 32-bit SHR.  Counts of 32/48 are
+     * masked by x86, so the generated code used the wrong half of the value
+     * and produced repeated checksum chunks.  Evaluate such expressions with
+     * the 64-bit parser first, then keep the low result word as the normal C
+     * conversion to the requested 32-bit context.
+     */
+    if (current_expression_mentions_64bit_symbol_now () ||
+        tok.kind == TOK_CLLONG || tok.kind == TOK_CULLONG ||
+        source_starts_64bit_integer_cast_now (tok.start) ||
+        source_starts_64bit_integer_cast_now (tok.caret)) {
+    
+        int is_unsigned = rhs_current_operand_is_unsigned_now ();
+        emit_load_assignment_rhs_expression_to_pair ("eax", "edx", is_unsigned);
+        
+        if (state->ofp && strcmp (reg, "eax") != 0) {
+        
+            if (state->syntax & ASM_SYNTAX_INTEL) {
+                fprintf (state->ofp, "    mov %s, eax\n", reg);
+            } else {
+                fprintf (state->ofp, "    movl %%eax, %%%s\n", reg);
+            }
+        
+        }
+        
+        return;
+    
+    }
+    
     emit_load_assignment_compare_expression_to_reg (reg);
     
     for (;;) {
diff --git a/int64.c b/int64.c
index dc100f8..257524e 100644
--- a/int64.c
+++ b/int64.c
@@ -12,20 +12,14 @@ extern unsigned long get_line_number (void);
 
 void add64 (int64_s *a, int64_s b) {
 
-    unsigned long raw_sum, carry;
+    unsigned long al = a->low & U32_MASK;
+    unsigned long bl = b.low & U32_MASK;
     
-    /* 1. Calculate the raw result */
-    raw_sum = a->low + b.low;
-
-    /* 2. Manually force 32-bit wrap around using a mask */
-    /* This ensures that any "1" that would be in the 33rd bit is stripped */
-    a->low = raw_sum & 0xFFFFFFFFUL;
-
-    /* 3. Extract the carry: if raw_sum > 0xFFFFFFFF, we had a carry */
-    carry = (raw_sum > 0xFFFFFFFFUL) ? 1 : 0;
-
-    /* 4. Update the high part */
-    a->high = (a->high + b.high + carry) & 0xFFFFFFFFUL;
+    unsigned long raw_sum = al + bl;
+    unsigned long carry = (raw_sum >> 32) & 1UL;
+    
+    a->low = raw_sum & U32_MASK;
+    a->high = ((a->high & U32_MASK) + (b.high & U32_MASK) + carry) & U32_MASK;
 
 }
 
@@ -47,7 +41,7 @@ void mul64 (int64_s *a, int64_s b) {
     unsigned long mid = (p0 >> 16) + (p1 & 0xFFFF) + (p2 & 0xFFFF);
     
     /* Store the new low 32 bits */
-    unsigned long new_low = ((mid << 16) | (p0 & 0xFFFF));
+    unsigned long new_low = ((mid << 16) | (p0 & 0xFFFF)) & U32_MASK;
     
     /* 2. Calculate the high word */
     /* Start with the carries/high-bits from the 32x32 multiply */
@@ -60,8 +54,8 @@ void mul64 (int64_s *a, int64_s b) {
     new_high += (b.high * a->low);
     
     /* Final Assignment */
-    a->low = new_low;
-    a->high = new_high;
+    a->low = new_low & U32_MASK;
+    a->high = new_high & U32_MASK;
 
 }
 
@@ -260,7 +254,7 @@ void mod64 (int64_s *a, int64_s b) {
 
 void zext64 (int64_s *dest, unsigned long input) {
 
-    dest->low = input;
+    dest->low = input & U32_MASK;
     dest->high = 0;                     /* Clear the upper bits */
 
 }
@@ -728,15 +722,16 @@ void parse_string_to_i64 (int64_s *val, const char *str) {
         val->high = (val->high * base) + carry2;
         
         /* Finalize low part */
-        val->low = ((res_mid & 0xFFFF) << 16) | (res_low & 0xFFFF);
+        val->low = (((res_mid & 0xFFFF) << 16) | (res_low & 0xFFFF)) & U32_MASK;
+        val->high &= U32_MASK;
         
         /* Add the new digit */
         old_low = val->low;
-        val->low += (unsigned long) digit;
+        val->low = (val->low + (unsigned long) digit) & U32_MASK;
         
         /* Handle carry from addition into the high word */
         if (val->low < old_low) {
-            val->high++;
+            val->high = (val->high + 1) & U32_MASK;
         }
         
         str++;