*/
if (op == TOK_LSH || op == TOK_RSH || op == TOK_LSHEQ || op == TOK_RSHEQ) {
+ /*
+ * AMD64 64-bit scalar expressions live in RAX, not RDX:RAX.
+ * Keep only the real left operand while the shift count is parsed.
+ */
emit_push_reg_now ("rax");
- emit_push_reg_now ("rdx");
-
emit_load_assignment_binary_expression_to_reg ("rbx");
if (state->syntax & ASM_SYNTAX_INTEL) {
fprintf (state->ofp, " xorq %%rcx, %%rcx\n");
}
- emit_pop_reg_now ("rdx");
emit_pop_reg_now ("rax");
} else {
/*
- * The generic 64-bit RHS loader uses EAX:EDX as scratch even
- * when asked to leave the final value in EBX:ECX. Preserve the
- * left operand around RHS evaluation; otherwise expressions such
- * as:
- *
- * result &= (((address_type)1) << n) - 1
- *
- * end up applying the operator to the RHS twice, because the
- * computed mask clobbers the original result in EAX:EDX.
+ * Native AMD64 64-bit binary ops use RAX and RBX. Do not route
+ * the RHS through RAX:RDX and then copy RDX into RCX; that is the
+ * old i386 pair model leaking into the AMD64 backend.
*/
emit_push_reg_now ("rax");
- emit_push_reg_now ("rdx");
emit_load_assignment_rhs_to_pair ("rbx", "rcx");
-
- emit_pop_reg_now ("rdx");
emit_pop_reg_now ("rax");
}
}
/*
- * Compound assignments need the complete RHS expression.
- * Using emit_load_assignment_rhs_to_pair() only consumes one
- * primary operand, so e.g.
+ * AMD64 uint64_t is a native qword, not an i386-style
+ * EDX:EAX pair. Preserve only the LHS qword in RAX and load
+ * the complete RHS directly into the native RHS register RBX.
*
- * final_value += symbol->frag->address + left_value;
+ * Using RAX:RDX as a temporary RHS pair created stale RDX
+ * saves/restores and useless MOV RCX,RDX instructions in code
+ * such as:
*
- * leaves the second + operand for the statement parser and
- * reports "expected ;". Evaluate the full RHS in rax:rdx,
- * copy it to the RHS pair rbx:rcx, then restore the original
- * LHS value before applying the compound operator.
+ * h ^= *p++;
+ * h *= UINT64_C (...);
*/
emit_push_reg_now ("rax");
- emit_push_reg_now ("rdx");
-
- emit_load_assignment_rhs_expression_to_pair ("rax", "rdx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
- emit_mov_reg_to_reg_now ("rbx", "rax");
- emit_mov_reg_to_reg_now ("rcx", "rdx");
-
- emit_pop_reg_now ("rdx");
+ emit_load_assignment_rhs_expression_to_pair ("rbx", "rcx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
emit_pop_reg_now ("rax");
emit_preserve_assignment64_regs (op);
}
/*
- * Compound assignments need the complete RHS expression.
- * Using emit_load_assignment_rhs_to_pair() only consumes one
- * primary operand, so e.g.
+ * AMD64 uint64_t is a native qword, not an i386-style
+ * EDX:EAX pair. Preserve only the LHS qword in RAX and load
+ * the complete RHS directly into the native RHS register RBX.
*
- * final_value += symbol->frag->address + left_value;
+ * Using RAX:RDX as a temporary RHS pair created stale RDX
+ * saves/restores and useless MOV RCX,RDX instructions in code
+ * such as:
*
- * leaves the second + operand for the statement parser and
- * reports "expected ;". Evaluate the full RHS in rax:rdx,
- * copy it to the RHS pair rbx:rcx, then restore the original
- * LHS value before applying the compound operator.
+ * h ^= *p++;
+ * h *= UINT64_C (...);
*/
emit_push_reg_now ("rax");
- emit_push_reg_now ("rdx");
-
- emit_load_assignment_rhs_expression_to_pair ("rax", "rdx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
- emit_mov_reg_to_reg_now ("rbx", "rax");
- emit_mov_reg_to_reg_now ("rcx", "rdx");
-
- emit_pop_reg_now ("rdx");
+ emit_load_assignment_rhs_expression_to_pair ("rbx", "rcx", lhs ? lhs->is_unsigned : get_global_symbol_unsigned (name));
emit_pop_reg_now ("rax");
emit_preserve_assignment64_regs (op);
fprintf (state->ofp, " mov esi, eax\n");
fprintf (state->ofp, " mov edi, edx\n");
fprintf (state->ofp, " mul ebx\n");
- fprintf (state->ofp, " mov esi, edx\n");
- fprintf (state->ofp, " mov edx, edi\n");
- fprintf (state->ofp, " imul edx, ebx\n");
- fprintf (state->ofp, " add edx, esi\n");
+
+ fprintf (state->ofp, " push eax\n");
+ fprintf (state->ofp, " mov eax, esi\n");
+
+ fprintf (state->ofp, " imul eax, ecx\n");
+ fprintf (state->ofp, " imul edi, ebx\n");
+
+ fprintf (state->ofp, " add edx, eax\n");
+ fprintf (state->ofp, " add edx, edi\n");
+
+ fprintf (state->ofp, " pop eax\n");
break;
case TOK_BSLASH: case TOK_SLASHEQ:
fprintf (state->ofp, " movl %%eax, %%esi\n");
fprintf (state->ofp, " movl %%edx, %%edi\n");
fprintf (state->ofp, " mull %%ebx\n");
- fprintf (state->ofp, " movl %%edx, %%esi\n");
- fprintf (state->ofp, " movl %%edi, %%edx\n");
- fprintf (state->ofp, " imull %%ebx, %%edx\n");
- fprintf (state->ofp, " addl %%esi, %%edx\n");
+
+ fprintf (state->ofp, " pushl %%eax\n");
+ fprintf (state->ofp, " movl %%esi, %%eax\n");
+
+ fprintf (state->ofp, " imull %%ecx, %%eax\n");
+ fprintf (state->ofp, " imull %%ebx, %%edi\n");
+
+ fprintf (state->ofp, " addl %%eax, %%edx\n");
+ fprintf (state->ofp, " addl %%edi, %%edx\n");
+
+ fprintf (state->ofp, " popl %%eax\n");
break;
case TOK_BSLASH: case TOK_SLASHEQ:
}
+ /*
+ * A 32-bit context can still contain a 64-bit integer expression whose
+ * final value is converted down to int/long/pointer. For example:
+ *
+ * (uint32_t)(checksum >> 32)
+ *
+ * The old path entered the 32-bit expression parser, loaded only the low
+ * word of checksum, and then emitted a 32-bit SHR. Counts of 32/48 are
+ * masked by x86, so the generated code used the wrong half of the value
+ * and produced repeated checksum chunks. Evaluate such expressions with
+ * the 64-bit parser first, then keep the low result word as the normal C
+ * conversion to the requested 32-bit context.
+ */
+ if (current_expression_mentions_64bit_symbol_now () ||
+ tok.kind == TOK_CLLONG || tok.kind == TOK_CULLONG ||
+ source_starts_64bit_integer_cast_now (tok.start) ||
+ source_starts_64bit_integer_cast_now (tok.caret)) {
+
+ int is_unsigned = rhs_current_operand_is_unsigned_now ();
+ emit_load_assignment_rhs_expression_to_pair ("eax", "edx", is_unsigned);
+
+ if (state->ofp && strcmp (reg, "eax") != 0) {
+
+ if (state->syntax & ASM_SYNTAX_INTEL) {
+ fprintf (state->ofp, " mov %s, eax\n", reg);
+ } else {
+ fprintf (state->ofp, " movl %%eax, %%%s\n", reg);
+ }
+
+ }
+
+ return;
+
+ }
+
emit_load_assignment_compare_expression_to_reg (reg);
for (;;) {
void add64 (int64_s *a, int64_s b) {
- unsigned long raw_sum, carry;
+ unsigned long al = a->low & U32_MASK;
+ unsigned long bl = b.low & U32_MASK;
- /* 1. Calculate the raw result */
- raw_sum = a->low + b.low;
-
- /* 2. Manually force 32-bit wrap around using a mask */
- /* This ensures that any "1" that would be in the 33rd bit is stripped */
- a->low = raw_sum & 0xFFFFFFFFUL;
-
- /* 3. Extract the carry: if raw_sum > 0xFFFFFFFF, we had a carry */
- carry = (raw_sum > 0xFFFFFFFFUL) ? 1 : 0;
-
- /* 4. Update the high part */
- a->high = (a->high + b.high + carry) & 0xFFFFFFFFUL;
+ unsigned long raw_sum = al + bl;
+ unsigned long carry = (raw_sum >> 32) & 1UL;
+
+ a->low = raw_sum & U32_MASK;
+ a->high = ((a->high & U32_MASK) + (b.high & U32_MASK) + carry) & U32_MASK;
}
unsigned long mid = (p0 >> 16) + (p1 & 0xFFFF) + (p2 & 0xFFFF);
/* Store the new low 32 bits */
- unsigned long new_low = ((mid << 16) | (p0 & 0xFFFF));
+ unsigned long new_low = ((mid << 16) | (p0 & 0xFFFF)) & U32_MASK;
/* 2. Calculate the high word */
/* Start with the carries/high-bits from the 32x32 multiply */
new_high += (b.high * a->low);
/* Final Assignment */
- a->low = new_low;
- a->high = new_high;
+ a->low = new_low & U32_MASK;
+ a->high = new_high & U32_MASK;
}
void zext64 (int64_s *dest, unsigned long input) {
- dest->low = input;
+ dest->low = input & U32_MASK;
dest->high = 0; /* Clear the upper bits */
}
val->high = (val->high * base) + carry2;
/* Finalize low part */
- val->low = ((res_mid & 0xFFFF) << 16) | (res_low & 0xFFFF);
+ val->low = (((res_mid & 0xFFFF) << 16) | (res_low & 0xFFFF)) & U32_MASK;
+ val->high &= U32_MASK;
/* Add the new digit */
old_low = val->low;
- val->low += (unsigned long) digit;
+ val->low = (val->low + (unsigned long) digit) & U32_MASK;
/* Handle carry from addition into the high word */
if (val->low < old_low) {
- val->high++;
+ val->high = (val->high + 1) & U32_MASK;
}
str++;