Arithmetic & Logic Operations

Mathematical and logical operations in x86-64 assembly.

Reference

Basic Arithmetic

; ADDITION
add rax, rbx        ; rax = rax + rbx
add rax, 5          ; rax = rax + 5
add rax, [rbx]      ; rax = rax + *rbx

; ADD with carry (for multi-precision)
adc rax, rbx        ; rax = rax + rbx + CF (carry flag)

; INCREMENT
inc rax             ; rax++ (doesn't affect CF, unlike add rax, 1)

; SUBTRACTION
sub rax, rbx        ; rax = rax - rbx
sub rax, 5          ; rax = rax - 5

; SUBTRACT with borrow
sbb rax, rbx        ; rax = rax - rbx - CF

; DECREMENT
dec rax             ; rax-- (doesn't affect CF)

; NEGATION (two's complement)
neg rax             ; rax = -rax (equivalent to: rax = 0 - rax)

; COMPARISON (subtract without storing result)
cmp rax, rbx        ; Compute rax - rbx, set flags, discard result
cmp rax, 5          ; Compare rax to immediate
cmp BYTE PTR [rbx], 0  ; Compare memory to zero

Math Connection:

Addition carries work exactly like decimal:
    47          0x2F
  + 85        + 0x55
  ----        ------
   132          0x84  (no carry)

    255         0xFF
  +   1       + 0x01
  ----        ------
   256          0x100 → 0x00 with CF=1 (carry)

Multiplication

; SIGNED multiplication (most common)
imul rax, rbx       ; rax = rax * rbx (truncated to 64 bits)
imul rax, rbx, 5    ; rax = rbx * 5
imul rax, [rbx]     ; rax = rax * *rbx

; One-operand form (full precision result)
; Multiplies RAX by operand, stores 128-bit result in RDX:RAX
imul rbx            ; RDX:RAX = RAX * RBX (signed)

; UNSIGNED multiplication
mul rbx             ; RDX:RAX = RAX * RBX (unsigned)

; Examples:
mov rax, 1000000000     ; 1 billion
mov rbx, 1000000000     ; 1 billion
mul rbx                 ; RDX:RAX = 10^18
; RAX = low 64 bits, RDX = high 64 bits

; Quick multiplications using shifts and LEA
shl rax, 1          ; rax *= 2
shl rax, 3          ; rax *= 8
lea rax, [rax*2]    ; rax *= 2
lea rax, [rax + rax*2]  ; rax *= 3
lea rax, [rax + rax*4]  ; rax *= 5
lea rax, [rax*8 + rax]  ; rax *= 9

Math Connection: Bit shifting is multiplication/division by powers of 2.

Decimal:  25 × 10 = 250   (shift left one decimal place)
Binary:   25 × 2  = 50    (shift left one bit)
          0b11001 << 1 = 0b110010 = 50

Division

; Division is complex because it needs 128-bit dividend

; SIGNED division
; Divides RDX:RAX by operand
; Quotient in RAX, remainder in RDX
mov rax, 17         ; Dividend
cqo                 ; Sign-extend RAX into RDX (RDX:RAX = signed RAX)
mov rbx, 5          ; Divisor
idiv rbx            ; RAX = 17 / 5 = 3, RDX = 17 % 5 = 2

; UNSIGNED division
mov rax, 17
xor edx, edx        ; Zero-extend (RDX = 0)
mov rbx, 5
div rbx             ; RAX = 3, RDX = 2

; Common pattern for signed division
signed_divide:
    mov rax, [dividend]
    cqo                 ; CRITICAL: Sign-extend to RDX:RAX
    idiv QWORD PTR [divisor]
    ; RAX = quotient
    ; RDX = remainder

; Common pattern for unsigned division
unsigned_divide:
    mov rax, [dividend]
    xor edx, edx        ; CRITICAL: Zero the high bits
    div QWORD PTR [divisor]

; DANGER: Division by zero causes exception (crash)
; DANGER: Overflow if quotient doesn't fit in RAX (e.g., huge RDX:RAX)

Sign extension instructions:

; Convert smaller signed to larger
cbw                 ; AL → AX (byte to word)
cwde                ; AX → EAX (word to double)
cdqe                ; EAX → RAX (double to quad)
cqo                 ; RAX → RDX:RAX (quad to octuple, for division)

Bitwise Operations

; AND - Both bits must be 1
and rax, rbx        ; rax = rax & rbx
and rax, 0xFF       ; Mask: keep only low byte
and rax, 0xFFFFFFF0 ; Clear low 4 bits (align to 16)

; OR - Either bit can be 1
or rax, rbx         ; rax = rax | rbx
or rax, 0x80        ; Set bit 7

; XOR - Bits must differ
xor rax, rbx        ; rax = rax ^ rbx
xor rax, rax        ; rax = 0 (fastest way to zero a register)
xor rax, 0xFF       ; Flip low 8 bits

; NOT - Flip all bits
not rax             ; rax = ~rax (one's complement)

; TEST - AND without storing (just set flags)
test rax, rax       ; Is RAX zero? (ZF=1 if zero)
test rax, 1         ; Is RAX odd? (ZF=0 if odd)
test rax, 0x80      ; Is bit 7 set?

Subnet Mask Connection:

IP:   192.168.1.50   = 0xC0A80132
Mask: 255.255.255.0  = 0xFFFFFF00
─────────────────────────────────
AND:  192.168.1.0    = 0xC0A80100  ← Network address

Same operation in assembly:
    mov eax, 0xC0A80132   ; IP address
    and eax, 0xFFFFFF00   ; Apply mask
    ; EAX = 0xC0A80100

Bit Shifts and Rotates

; SHIFT LEFT - Multiply by 2^n, zeros fill from right
shl rax, 1          ; rax *= 2
shl rax, 4          ; rax *= 16
shl rax, cl         ; Shift by value in CL register

; SHIFT RIGHT LOGICAL - Divide unsigned by 2^n, zeros fill from left
shr rax, 1          ; rax /= 2 (unsigned)
shr rax, 4          ; rax /= 16

; SHIFT RIGHT ARITHMETIC - Divide signed by 2^n, sign bit fills from left
sar rax, 1          ; rax /= 2 (signed, preserves sign)
; -8 >> 1 = -4 (correct with SAR)
; -8 >> 1 = huge positive (wrong with SHR)

; ROTATE - Bits wrap around
rol rax, 4          ; Rotate left 4 bits
ror rax, 4          ; Rotate right 4 bits

; Rotate through carry (for multi-precision)
rcl rax, 1          ; Rotate left through CF
rcr rax, 1          ; Rotate right through CF

Math Connection:

Left shift = multiply by 2:
    5 << 1 = 10       (0b0101 → 0b1010)
    5 << 2 = 20       (0b0101 → 0b10100)
    5 << 3 = 40       (0b0101 → 0b101000)

Right shift = divide by 2:
    40 >> 1 = 20
    40 >> 2 = 10
    40 >> 3 = 5

For x * 10:
    lea rax, [rax + rax*4]  ; rax = rax * 5
    shl rax, 1              ; rax = rax * 2 (total: * 10)

Flags in Detail

; Operations that SET flags:
;   add, sub, and, or, xor, cmp, test, inc, dec, neg, shl, shr, etc.

; Operations that DON'T affect flags:
;   mov, lea, push, pop, call, ret, jmp

; Zero Flag (ZF) - Set when result is zero
mov eax, 5
sub eax, 5          ; Result = 0, ZF = 1

xor eax, eax        ; ZF = 1 (result is zero)

; Sign Flag (SF) - Set when result is negative (MSB = 1)
mov al, 0x7F        ; 127
add al, 1           ; Result = 0x80 = 128 unsigned, -128 signed
                    ; SF = 1 (MSB is 1)

; Carry Flag (CF) - Unsigned overflow/underflow
mov al, 0xFF        ; 255
add al, 1           ; Result = 0, CF = 1 (carried out of 8 bits)

mov al, 0
sub al, 1           ; Result = 0xFF, CF = 1 (borrowed)

; Overflow Flag (OF) - Signed overflow
mov al, 127         ; Max positive signed byte
add al, 1           ; Result = 128 = -128 signed (WRONG!)
                    ; OF = 1 (signed overflow)

mov al, -128        ; Min negative signed byte
sub al, 1           ; Result = 127 (WRONG!)
                    ; OF = 1 (signed underflow)

The key insight:

The CPU computes BOTH signed and unsigned results simultaneously.
CF tells you about unsigned interpretation.
OF tells you about signed interpretation.
YOU decide which to check based on your data type.

Comparisons and Conditional Jumps

; CMP performs subtraction without storing result
cmp rax, rbx        ; Compute rax - rbx, set flags

; Unsigned comparisons (use CF)
ja  label           ; Jump if Above (CF=0 and ZF=0)
jae label           ; Jump if Above or Equal (CF=0)
jb  label           ; Jump if Below (CF=1)
jbe label           ; Jump if Below or Equal (CF=1 or ZF=1)

; Signed comparisons (use SF and OF)
jg  label           ; Jump if Greater (ZF=0 and SF=OF)
jge label           ; Jump if Greater or Equal (SF=OF)
jl  label           ; Jump if Less (SF≠OF)
jle label           ; Jump if Less or Equal (ZF=1 or SF≠OF)

; Equality (works for both signed and unsigned)
je  label           ; Jump if Equal (ZF=1)
jne label           ; Jump if Not Equal (ZF=0)

; Zero/Sign tests
jz  label           ; Jump if Zero (same as JE)
jnz label           ; Jump if Not Zero (same as JNE)
js  label           ; Jump if Sign (SF=1, negative)
jns label           ; Jump if Not Sign (SF=0, positive or zero)

; Carry/Overflow tests
jc  label           ; Jump if Carry (CF=1)
jnc label           ; Jump if No Carry (CF=0)
jo  label           ; Jump if Overflow (OF=1)
jno label           ; Jump if No Overflow (OF=0)

Quick Reference:

Unsigned:  JA/JAE/JB/JBE   (Above/Below)
Signed:    JG/JGE/JL/JLE   (Greater/Less)
Either:    JE/JNE          (Equal/Not Equal)

WRONG: Using JG when comparing unsigned values
       (0xFFFFFFFF is LESS than 0 with JG, but ABOVE with JA)

Control Flow Structures

// if (x > 10) { y = 1; } else { y = 0; }

; If-else
    cmp rax, 10         ; Compare x to 10
    jle .else_branch    ; If x <= 10, go to else
    mov rbx, 1          ; y = 1 (then branch)
    jmp .end_if
.else_branch:
    mov rbx, 0          ; y = 0 (else branch)
.end_if:

// while (x > 0) { x--; sum += x; }

; While loop
.while_start:
    cmp rax, 0          ; x > 0?
    jle .while_end      ; If not, exit loop
    dec rax             ; x--
    add rbx, rax        ; sum += x
    jmp .while_start    ; Repeat
.while_end:

// for (int i = 0; i < 10; i++) { sum += i; }

; For loop
    xor ecx, ecx        ; i = 0
    xor eax, eax        ; sum = 0
.for_loop:
    cmp ecx, 10         ; i < 10?
    jge .for_end        ; If not, exit
    add eax, ecx        ; sum += i
    inc ecx             ; i++
    jmp .for_loop
.for_end:

// switch (x) { case 1: ...; case 2: ...; default: ...; }

; Switch (simple version with comparisons)
    cmp rax, 1
    je .case_1
    cmp rax, 2
    je .case_2
    jmp .default

.case_1:
    ; Handle case 1
    jmp .switch_end
.case_2:
    ; Handle case 2
    jmp .switch_end
.default:
    ; Handle default
.switch_end:

Loop Optimization Patterns

; WRONG: Testing at top of loop (extra jump)
.loop:
    cmp rcx, 0
    jle .done           ; Branch taken 0 times, not taken N times
    ; ... loop body ...
    dec rcx
    jmp .loop           ; Always taken
.done:

; BETTER: Test at bottom (one fewer instruction in hot path)
    test rcx, rcx       ; Check if already zero
    jz .done
.loop:
    ; ... loop body ...
    dec rcx
    jnz .loop           ; Branch taken N-1 times, not taken once
.done:

; Use LOOP instruction (rarely optimal, but simple)
    mov rcx, 10         ; Loop count
.loop:
    ; ... loop body ...
    loop .loop          ; Decrement RCX, jump if not zero

; Unrolling (do multiple iterations per loop)
.loop:
    ; Iteration 1
    add rax, [rsi]
    add rsi, 8
    ; Iteration 2
    add rax, [rsi]
    add rsi, 8
    ; Iteration 3
    add rax, [rsi]
    add rsi, 8
    ; Iteration 4
    add rax, [rsi]
    add rsi, 8
    sub rcx, 4
    jnz .loop

Conditional Set (SETcc)

; Set byte to 1 or 0 based on flags
; Useful for boolean expressions without branches

cmp rax, rbx
sete al             ; AL = 1 if equal, 0 otherwise
setne al            ; AL = 1 if not equal
setg al             ; AL = 1 if greater (signed)
setl al             ; AL = 1 if less (signed)
seta al             ; AL = 1 if above (unsigned)
setb al             ; AL = 1 if below (unsigned)

; Convert to full register (zero-extend)
movzx eax, al       ; EAX = 0 or 1

; Example: return (x > y)
cmp rdi, rsi        ; Compare x and y
setg al             ; AL = (x > y)
movzx eax, al       ; Return value in EAX
ret

CMOVcc - Conditional Move (branchless):

; Avoid branch misprediction with conditional move
cmp rax, rbx
cmovg rax, rcx      ; If greater, rax = rcx (else unchanged)
cmovl rax, rcx      ; If less, rax = rcx
cmove rax, rcx      ; If equal, rax = rcx

; Example: max(a, b)
mov rax, rdi        ; rax = a
cmp rdi, rsi        ; Compare a and b
cmovl rax, rsi      ; If a < b, rax = b
ret                 ; Return max

; Equivalent C (without branch):
; return (a < b) ? b : a;

Advanced Arithmetic

; ABSOLUTE VALUE
; abs(x) = (x < 0) ? -x : x
mov rax, rdi        ; rax = x
mov rdx, rdi        ; rdx = x
sar rdx, 63         ; rdx = -1 if negative, 0 if positive
xor rax, rdx        ; If negative: flip all bits
sub rax, rdx        ; If negative: add 1 (completes two's complement)
; rax = abs(x)

; SIGN FUNCTION
; sign(x) = -1, 0, or 1
mov rax, rdi        ; rax = x
sar rax, 63         ; rax = -1 if negative, 0 otherwise
mov rdx, rdi
neg rdx             ; rdx = -x
sar rdx, 63         ; rdx = -1 if x was positive
sub rax, rdx        ; Combine: -1, 0, or 1

; MIN/MAX without branches
; min(a, b)
mov rax, rdi
cmp rdi, rsi
cmovg rax, rsi      ; If a > b, rax = b
ret

; CLAMP to range [lo, hi]
; clamp(x, lo, hi) = max(lo, min(x, hi))
cmp rdi, rdx        ; x > hi?
cmovg rdi, rdx      ; If so, x = hi
cmp rdi, rsi        ; x < lo?
cmovl rdi, rsi      ; If so, x = lo
mov rax, rdi
ret

; BIT COUNTING
popcnt rax, rbx     ; RAX = number of 1 bits in RBX
lzcnt rax, rbx      ; RAX = number of leading zeros
tzcnt rax, rbx      ; RAX = number of trailing zeros

; BYTE SWAP (endianness conversion)
bswap rax           ; Reverse byte order in RAX
; 0x0102030405060708 → 0x0807060504030201

Floating Point (SSE/AVX)

; Floating point uses XMM registers (128-bit)
; Scalar operations use lower 32/64 bits

; Load/store
movss xmm0, [rax]       ; Load single float (32-bit)
movsd xmm0, [rax]       ; Load double (64-bit)
movss [rax], xmm0       ; Store single
movsd [rax], xmm0       ; Store double

; Arithmetic (scalar single/double)
addss xmm0, xmm1        ; xmm0 = xmm0 + xmm1 (single)
addsd xmm0, xmm1        ; xmm0 = xmm0 + xmm1 (double)
subss xmm0, xmm1
subsd xmm0, xmm1
mulss xmm0, xmm1
mulsd xmm0, xmm1
divss xmm0, xmm1
divsd xmm0, xmm1
sqrtss xmm0, xmm1       ; Square root

; Comparison
ucomiss xmm0, xmm1      ; Compare singles, set flags
ucomisd xmm0, xmm1      ; Compare doubles, set flags
; Then use regular jump instructions

; Conversion
cvtsi2sd xmm0, eax      ; Integer to double
cvtsd2si eax, xmm0      ; Double to integer (truncate)
cvtss2sd xmm0, xmm1     ; Single to double
cvtsd2ss xmm0, xmm1     ; Double to single

; Example: double average(double a, double b)
; Arguments in xmm0 (a) and xmm1 (b)
average:
    addsd xmm0, xmm1    ; xmm0 = a + b
    mov rax, 2
    cvtsi2sd xmm1, rax  ; xmm1 = 2.0
    divsd xmm0, xmm1    ; xmm0 = (a + b) / 2
    ret

Practice Exercises

; Exercise 1: What's the result and which flags are set?
mov al, 0x80            ; 128 unsigned, -128 signed
add al, 0x80            ; Result? Flags?
; Answer: AL = 0x00, ZF=1, CF=1, OF=1
; Unsigned: 128 + 128 = 256, doesn't fit → CF=1
; Signed: -128 + -128 = -256, doesn't fit → OF=1

; Exercise 2: Implement multiplication by 7 without MUL
; Answer:
lea rax, [rdi + rdi*2]  ; rax = 3 * x
lea rax, [rax + rdi*4]  ; rax = 3x + 4x = 7x

; Exercise 3: Implement is_power_of_two(x)
; A power of 2 has exactly one bit set: x & (x-1) == 0
mov rax, rdi
dec rax                 ; rax = x - 1
and rax, rdi            ; rax = x & (x - 1)
setz al                 ; AL = 1 if zero (power of 2)
movzx eax, al
ret

; Exercise 4: Round up to next power of 2
; Using bit manipulation (assumes x > 0)
dec rdi                 ; x - 1
mov rax, rdi
shr rax, 1
or rdi, rax
shr rax, 1
or rdi, rax
shr rax, 2
or rdi, rax
shr rax, 4
or rdi, rax
; ... continue for all bits
inc rdi                 ; Final result
mov rax, rdi
ret

; Exercise 5: What jump should you use?
; Comparing ages (unsigned, 0-150)
cmp eax, 21
jae can_drink           ; Use unsigned comparison

; Comparing temperatures (signed, -40 to 120)
cmp eax, 0
jl below_zero           ; Use signed comparison