Linux System Calls
System call interface for x86-64 Linux.
What is a System Call?
A system call (syscall) is how userspace programs request services from the kernel: file I/O, networking, process control, memory management.
Your Program (userspace)
│
│ syscall instruction
▼
─────────────────────────────
│
▼
Kernel (ring 0)
│
▼
Hardware/Resources
The syscall instruction:
1. Saves return address in RCX
2. Saves RFLAGS in R11
3. Switches to kernel mode
4. Jumps to kernel syscall handler
5. Kernel looks up syscall number in RAX
6. Executes the syscall
7. Returns result in RAX
Linux x86-64 Syscall Convention
╔═══════════════════════════════════════════════════════════════════╗
║ SYSCALL ARGUMENTS ║
╠═══════════════════════════════════════════════════════════════════╣
║ Register │ Purpose ║
║───────────┼────────────────────────────────────────────────────────║
║ RAX │ Syscall number (BEFORE) / Return value (AFTER) ║
║ RDI │ 1st argument ║
║ RSI │ 2nd argument ║
║ RDX │ 3rd argument ║
║ R10 │ 4th argument (NOT RCX - kernel uses it) ║
║ R8 │ 5th argument ║
║ R9 │ 6th argument ║
╠═══════════════════════════════════════════════════════════════════╣
║ RETURN VALUE ║
╠═══════════════════════════════════════════════════════════════════╣
║ RAX ≥ 0 │ Success (value depends on syscall) ║
║ RAX < 0 │ Error: RAX = -errno (e.g., -ENOENT = -2) ║
╠═══════════════════════════════════════════════════════════════════╣
║ CLOBBERED REGISTERS ║
╠═══════════════════════════════════════════════════════════════════╣
║ RCX, R11 │ Always destroyed by syscall instruction ║
║ RAX │ Contains return value ║
╚═══════════════════════════════════════════════════════════════════╝
Note: Different from function calling convention! R10 instead of RCX.
Common Syscall Numbers
| Number | Name | Description |
|---|---|---|
0 |
read |
Read from file descriptor |
1 |
write |
Write to file descriptor |
2 |
open |
Open file |
3 |
close |
Close file descriptor |
9 |
mmap |
Map memory |
11 |
munmap |
Unmap memory |
12 |
brk |
Change data segment size |
21 |
access |
Check file permissions |
33 |
dup2 |
Duplicate file descriptor |
39 |
getpid |
Get process ID |
57 |
fork |
Create child process |
59 |
execve |
Execute program |
60 |
exit |
Terminate process |
61 |
wait4 |
Wait for child process |
63 |
uname |
Get system info |
102 |
getuid |
Get user ID |
231 |
exit_group |
Exit all threads |
# Find all syscall numbers on your system
cat /usr/include/asm/unistd_64.h | grep __NR_
# Or use ausyscall
ausyscall --dump
Syscall Examples
; ═══════════════════════════════════════════════════════════════════
; WRITE - Write to stdout
; ssize_t write(int fd, const void *buf, size_t count)
; ═══════════════════════════════════════════════════════════════════
section .data
msg: db "Hello, World!", 10 ; String with newline
len: equ $ - msg ; Length = current position - start
section .text
global _start
_start:
mov rax, 1 ; syscall: write
mov rdi, 1 ; fd: stdout
lea rsi, [rel msg] ; buf: address of string
mov rdx, len ; count: string length
syscall ; Call kernel
; Check return value
test rax, rax
js error ; If negative, error occurred
; ═══════════════════════════════════════════════════════════════════
; READ - Read from stdin
; ssize_t read(int fd, void *buf, size_t count)
; ═══════════════════════════════════════════════════════════════════
section .bss
buffer: resb 256 ; Reserve 256 bytes
section .text
read_input:
mov rax, 0 ; syscall: read
mov rdi, 0 ; fd: stdin
lea rsi, [rel buffer]
mov rdx, 256 ; Max bytes to read
syscall
; RAX = number of bytes read (or negative error)
; ═══════════════════════════════════════════════════════════════════
; OPEN - Open a file
; int open(const char *pathname, int flags, mode_t mode)
; ═══════════════════════════════════════════════════════════════════
section .data
filename: db "/etc/passwd", 0 ; Null-terminated
section .text
open_file:
mov rax, 2 ; syscall: open
lea rdi, [rel filename]
mov rsi, 0 ; O_RDONLY = 0
xor rdx, rdx ; mode (ignored for O_RDONLY)
syscall
; RAX = file descriptor (or negative error)
mov [fd], rax ; Save fd
; ═══════════════════════════════════════════════════════════════════
; EXIT - Terminate process
; void exit(int status)
; ═══════════════════════════════════════════════════════════════════
exit_success:
mov rax, 60 ; syscall: exit
xor rdi, rdi ; status: 0
syscall
exit_error:
mov rax, 60
mov rdi, 1 ; status: 1 (error)
syscall
Complete Program: Hello World
; hello.asm - Minimal Linux x86-64 program
; Assemble: nasm -f elf64 hello.asm -o hello.o
; Link: ld hello.o -o hello
; Run: ./hello
section .data
msg: db "Hello, World!", 10
len: equ $ - msg
section .text
global _start
_start:
; write(1, msg, len)
mov rax, 1 ; sys_write
mov rdi, 1 ; stdout
lea rsi, [rel msg] ; message address
mov rdx, len ; message length
syscall
; exit(0)
mov rax, 60 ; sys_exit
xor rdi, rdi ; exit code 0
syscall
# Build and run
nasm -f elf64 hello.asm -o hello.o
ld hello.o -o hello
./hello
Hello, World!
# Check file size (tiny!)
ls -l hello
# -rwxr-xr-x 1 user user 784 Feb 27 12:00 hello
# Compare to C hello world: ~16KB with glibc!
File Operations Example
; Copy file using syscalls
; Usage: ./copy source dest
section .data
O_RDONLY: equ 0
O_WRONLY: equ 1
O_CREAT: equ 64
O_TRUNC: equ 512
section .bss
buf: resb 4096 ; 4KB buffer
src_fd: resq 1
dst_fd: resq 1
section .text
global _start
_start:
; Get argc from stack
mov rdi, [rsp] ; argc
cmp rdi, 3
jne usage_error ; Need exactly 3 args (progname, src, dst)
; Open source file (argv[1])
mov rax, 2 ; sys_open
mov rdi, [rsp + 16] ; argv[1]
mov rsi, O_RDONLY
syscall
test rax, rax
js open_error
mov [src_fd], rax
; Create/truncate destination file (argv[2])
mov rax, 2 ; sys_open
mov rdi, [rsp + 24] ; argv[2]
mov rsi, O_WRONLY | O_CREAT | O_TRUNC
mov rdx, 0644o ; Mode: rw-r--r-- (octal!)
syscall
test rax, rax
js open_error
mov [dst_fd], rax
.copy_loop:
; Read from source
mov rax, 0 ; sys_read
mov rdi, [src_fd]
lea rsi, [rel buf]
mov rdx, 4096
syscall
test rax, rax
js read_error
jz .done ; EOF
; Write to destination
mov rdx, rax ; bytes to write
mov rax, 1 ; sys_write
mov rdi, [dst_fd]
lea rsi, [rel buf]
syscall
test rax, rax
js write_error
jmp .copy_loop
.done:
; Close files
mov rax, 3 ; sys_close
mov rdi, [src_fd]
syscall
mov rax, 3
mov rdi, [dst_fd]
syscall
; Exit success
mov rax, 60
xor rdi, rdi
syscall
open_error:
read_error:
write_error:
usage_error:
mov rax, 60
mov rdi, 1
syscall
Memory Mapping (mmap)
; mmap - Map memory pages
; void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
; Flags
PROT_READ: equ 1
PROT_WRITE: equ 2
PROT_EXEC: equ 4
MAP_PRIVATE: equ 2
MAP_ANON: equ 32
; Allocate 4KB anonymous memory (like malloc)
allocate_page:
mov rax, 9 ; sys_mmap
xor rdi, rdi ; addr: NULL (let kernel choose)
mov rsi, 4096 ; length: 4KB
mov rdx, PROT_READ | PROT_WRITE
mov r10, MAP_PRIVATE | MAP_ANON
mov r8, -1 ; fd: -1 (anonymous)
xor r9, r9 ; offset: 0
syscall
; RAX = pointer to mapped memory (or negative error)
ret
; Free mapped memory
; munmap(void *addr, size_t length)
free_page:
; rdi = address, rsi = length
mov rax, 11 ; sys_munmap
syscall
ret
; Map a file into memory (memory-mapped I/O)
map_file:
; First, open the file
mov rax, 2
; rdi = filename
mov rsi, O_RDONLY
syscall
mov r12, rax ; Save fd
; Get file size using fstat
mov rax, 5 ; sys_fstat
mov rdi, r12
sub rsp, 144 ; stat struct on stack
mov rsi, rsp
syscall
mov rsi, [rsp + 48] ; st_size offset in stat struct
add rsp, 144
; mmap the file
mov rax, 9
xor rdi, rdi
; rsi already has size
mov rdx, PROT_READ
mov r10, MAP_PRIVATE
mov r8, r12 ; fd
xor r9, r9 ; offset 0
syscall
; RAX = mapped file content
ret
Process Control: fork/exec
; Fork and execute a command
; Equivalent to: execl("/bin/ls", "ls", "-la", NULL)
section .data
cmd: db "/bin/ls", 0
arg0: db "ls", 0
arg1: db "-la", 0
argv: dq arg0, arg1, 0 ; NULL-terminated array of pointers
envp: dq 0 ; Empty environment
section .text
fork_exec:
; fork()
mov rax, 57 ; sys_fork
syscall
test rax, rax
js fork_error
jnz parent ; Parent: rax = child PID
; Child continues here (rax = 0)
child:
; execve("/bin/ls", argv, envp)
mov rax, 59 ; sys_execve
lea rdi, [rel cmd] ; pathname
lea rsi, [rel argv] ; argv array
lea rdx, [rel envp] ; envp array
syscall
; If we get here, execve failed
mov rax, 60
mov rdi, 1
syscall
parent:
mov r12, rax ; Save child PID
; wait4(pid, &status, 0, NULL)
mov rax, 61 ; sys_wait4
mov rdi, r12 ; pid
sub rsp, 8
mov rsi, rsp ; &status
xor rdx, rdx ; options: 0
xor r10, r10 ; rusage: NULL
syscall
; Exit status in [rsp]
add rsp, 8
ret
fork_error:
; Handle error
ret
Network: Socket Syscalls
; TCP server basics
; socket → bind → listen → accept → read/write → close
; Constants
AF_INET: equ 2
SOCK_STREAM: equ 1
IPPROTO_TCP: equ 6
section .bss
sockfd: resq 1
clientfd: resq 1
sockaddr: resb 16 ; struct sockaddr_in
section .text
; Create socket
create_socket:
mov rax, 41 ; sys_socket
mov rdi, AF_INET ; domain: IPv4
mov rsi, SOCK_STREAM ; type: TCP
mov rdx, IPPROTO_TCP ; protocol
syscall
mov [sockfd], rax
ret
; Bind to address
; struct sockaddr_in {
; uint16_t sin_family; // offset 0, 2 bytes
; uint16_t sin_port; // offset 2, 2 bytes (network byte order!)
; uint32_t sin_addr; // offset 4, 4 bytes
; uint8_t sin_zero[8]; // offset 8, 8 bytes padding
; }
bind_socket:
; Set up sockaddr_in for 0.0.0.0:8080
lea rdi, [rel sockaddr]
mov WORD [rdi], AF_INET ; sin_family
mov WORD [rdi + 2], 0x901f ; sin_port = htons(8080) = 0x1f90 → 0x901f
mov DWORD [rdi + 4], 0 ; sin_addr = INADDR_ANY
mov rax, 49 ; sys_bind
mov rdi, [sockfd]
lea rsi, [rel sockaddr]
mov rdx, 16 ; sizeof(sockaddr_in)
syscall
ret
; Listen for connections
listen_socket:
mov rax, 50 ; sys_listen
mov rdi, [sockfd]
mov rsi, 5 ; backlog
syscall
ret
; Accept connection
accept_connection:
mov rax, 43 ; sys_accept
mov rdi, [sockfd]
xor rsi, rsi ; addr: NULL (don't care who connected)
xor rdx, rdx ; addrlen: NULL
syscall
mov [clientfd], rax ; Save client socket
ret
; Send response
send_response:
mov rax, 1 ; sys_write (works for sockets too!)
mov rdi, [clientfd]
lea rsi, [rel http_response]
mov rdx, http_response_len
syscall
ret
section .data
http_response: db "HTTP/1.0 200 OK", 13, 10
db "Content-Type: text/plain", 13, 10
db 13, 10
db "Hello from assembly!", 10
http_response_len: equ $ - http_response
Error Handling Pattern
; Syscalls return negative errno on error
; Common errors:
; -1 = EPERM (Operation not permitted)
; -2 = ENOENT (No such file or directory)
; -9 = EBADF (Bad file descriptor)
; -13 = EACCES (Permission denied)
; -14 = EFAULT (Bad address)
; -22 = EINVAL (Invalid argument)
; Error checking pattern
syscall_with_check:
mov rax, 2 ; sys_open
lea rdi, [filename]
mov rsi, 0 ; O_RDONLY
syscall
; Check for error
test rax, rax
js .error ; Jump if negative (sign bit set)
; Success path
mov [fd], rax
ret
.error:
; RAX contains negative errno
neg rax ; Now RAX = positive errno
; Could print error message here
mov rdi, rax ; Exit with errno as status
mov rax, 60
syscall
; Or using compare
cmp rax, -4096 ; All errnos are > -4096
ja .error ; Unsigned above means negative value
; For functions that might return -1 legitimately (like mmap)
cmp rax, -1
je .check_errno ; Special check needed
Interfacing with C Library
; When linking with libc, use wrapper functions instead of raw syscalls
; They handle errno properly and follow the C ABI
; Example: Using libc write()
extern write
extern exit
extern printf
section .data
fmt: db "The answer is: %d", 10, 0
section .text
global main
main:
; printf("The answer is: %d\n", 42)
lea rdi, [rel fmt]
mov esi, 42
xor eax, eax ; No floating point args
call printf
; exit(0)
xor edi, edi
call exit
# Link with libc (different from pure assembly)
nasm -f elf64 program.asm -o program.o
gcc program.o -o program -no-pie
# Or with static linking
gcc program.o -o program -static
Common Syscall Gotchas
; WRONG: Using RCX for 4th argument (kernel uses it)
mov rcx, fourth_arg
syscall ; RCX is DESTROYED by syscall!
; CORRECT: Use R10 for 4th argument
mov r10, fourth_arg
syscall
; WRONG: Forgetting that RCX and R11 are clobbered
mov rcx, important_value
syscall
; RCX now contains return address, not important_value!
; CORRECT: Save registers if needed
push rcx
push r11
syscall
pop r11
pop rcx
; WRONG: Treating error like success
mov rax, 2 ; open
syscall
mov [fd], rax ; Might be storing -2 (ENOENT)!
; CORRECT: Check for error
syscall
test rax, rax
js handle_error
mov [fd], rax
; WRONG: Passing pointer to local variable after function returns
my_func:
sub rsp, 16
mov [rsp], rdi ; Store string on stack
mov rdi, rsp ; Pass pointer
call async_operation ; If this stores pointer for later, WRONG!
add rsp, 16 ; Stack space is gone!
ret
; WRONG: Network byte order for ports
mov WORD [port], 8080 ; Stores as 0x1F90 (wrong order!)
; CORRECT: Use big-endian (network byte order)
mov WORD [port], 0x901F ; htons(8080) = 0x1F90 reversed