Performance Tuning
Quick Reference
# System overview
top / htop / atop
vmstat 1
iostat -xz 1
sar -u 1 5
# CPU
mpstat -P ALL 1
perf top
turbostat
# Memory
free -h
vmstat -s
cat /proc/meminfo
# Disk I/O
iotop
iostat -xdz 1
blktrace /dev/sda
# Network
iftop
nethogs
ss -s
Performance Analysis Methodology
USE Method (Utilization, Saturation, Errors)
For each resource, check:
| Metric | Description | Tools |
|---|---|---|
Utilization |
Percentage of time resource is busy |
|
Saturation |
Degree to which work is queued |
|
Errors |
Count of error events |
|
Quick System Assessment
# 60-second analysis checklist
uptime # Load averages
dmesg | tail # Kernel errors
vmstat 1 5 # Overall stats
mpstat -P ALL 1 5 # CPU balance
pidstat 1 5 # Process CPU
iostat -xz 1 5 # Disk I/O
free -m # Memory
sar -n DEV 1 5 # Network I/O
sar -n TCP,ETCP 1 5 # TCP stats
top # Overview
CPU Performance
CPU Monitoring
# Overall CPU usage
top
htop
# Per-CPU statistics
mpstat -P ALL 1
# CPU usage by process
pidstat -u 1
# Real-time function profiling
perf top
# CPU frequency and turbo
turbostat --Summary --quiet
cat /proc/cpuinfo | grep MHz
# Load average
uptime
cat /proc/loadavg
Understanding Load Average
# Load average: 1-min, 5-min, 15-min
# Rule: load average / CPU count
# < 1.0 = OK
# 1.0-2.0 = acceptable
# > 2.0 = investigate
# Check CPU count
nproc
lscpu | grep "^CPU(s):"
# Example: 8 CPUs, load 4.0 = 50% utilized
# Example: 4 CPUs, load 8.0 = 200% (overloaded)
CPU Governor and Frequency
# Current governor
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
# Available governors
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors
# Set governor (all CPUs)
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
# Using cpupower
cpupower frequency-info
cpupower frequency-set -g performance
# Persistent (systemd)
# /etc/systemd/system/cpupower.service
[Unit]
Description=CPU Power Management
After=multi-user.target
[Service]
Type=oneshot
ExecStart=/usr/bin/cpupower frequency-set -g performance
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target
Process Priority and Affinity
# Nice values (-20 to 19, lower = higher priority)
nice -n -10 command # Start with priority
renice -n -5 -p PID # Change running process
# CPU affinity
taskset -c 0,1 command # Run on CPUs 0,1
taskset -pc 0-3 PID # Set affinity for running process
# View process affinity
taskset -p PID
# Isolate CPUs from scheduler (kernel boot parameter)
# isolcpus=2,3
# Then pin specific processes to those CPUs
CPU Sysctl Tuning
# /etc/sysctl.d/99-cpu.conf
# Scheduler tuning
kernel.sched_migration_cost_ns = 5000000
kernel.sched_autogroup_enabled = 0
# For latency-sensitive workloads
kernel.sched_min_granularity_ns = 10000000
kernel.sched_wakeup_granularity_ns = 15000000
# Apply
sysctl -p /etc/sysctl.d/99-cpu.conf
Memory Performance
Memory Monitoring
# Overview
free -h
cat /proc/meminfo
# Detailed statistics
vmstat -s
# Memory usage by process
ps aux --sort=-%mem | head
top -o %MEM
# Page faults
sar -B 1
# Swap usage
swapon --show
cat /proc/swaps
vmstat 1 | awk '{print $7, $8}' # si, so columns
Understanding Memory Metrics
# Key /proc/meminfo fields
MemTotal # Total physical RAM
MemFree # Completely unused
MemAvailable # Available for allocation (includes reclaimable)
Buffers # Block device buffers
Cached # Page cache
SwapTotal # Total swap
SwapFree # Unused swap
Dirty # Waiting to be written to disk
Writeback # Being written to disk
Mapped # Memory mapped files
Slab # Kernel data structures
SReclaimable # Reclaimable slab memory
PageTables # Page table memory
Swap Configuration
# Check swappiness (0-100, higher = more aggressive)
cat /proc/sys/vm/swappiness
# Set swappiness
# For servers with plenty of RAM
echo 10 | sudo tee /proc/sys/vm/swappiness
# Persistent
echo "vm.swappiness = 10" | sudo tee -a /etc/sysctl.d/99-memory.conf
# Disable swap (not recommended for most cases)
swapoff -a
# Create swap file
dd if=/dev/zero of=/swapfile bs=1M count=4096
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile
# Persistent swap in /etc/fstab
/swapfile none swap sw 0 0
Memory Sysctl Tuning
# /etc/sysctl.d/99-memory.conf
# Swappiness (0-100)
vm.swappiness = 10
# VFS cache pressure (default 100)
vm.vfs_cache_pressure = 50
# Dirty page settings
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
vm.dirty_expire_centisecs = 3000
vm.dirty_writeback_centisecs = 500
# Overcommit settings
vm.overcommit_memory = 0
vm.overcommit_ratio = 50
# For large memory systems
vm.min_free_kbytes = 262144
# Zone reclaim mode (NUMA)
vm.zone_reclaim_mode = 0
# Apply
sysctl -p /etc/sysctl.d/99-memory.conf
Transparent Huge Pages
# Check THP status
cat /sys/kernel/mm/transparent_hugepage/enabled
# Disable THP (recommended for databases)
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/defrag
# Persistent (add to kernel parameters or use systemd)
# Kernel parameter: transparent_hugepage=never
# Or systemd service
cat > /etc/systemd/system/disable-thp.service << 'EOF'
[Unit]
Description=Disable Transparent Huge Pages
[Service]
Type=oneshot
ExecStart=/bin/sh -c "echo never > /sys/kernel/mm/transparent_hugepage/enabled"
ExecStart=/bin/sh -c "echo never > /sys/kernel/mm/transparent_hugepage/defrag"
RemainAfterExit=yes
[Install]
WantedBy=multi-user.target
EOF
systemctl enable disable-thp.service
NUMA Optimization
# Check NUMA topology
numactl --hardware
lscpu | grep NUMA
# View NUMA statistics
numastat
numastat -p PID
# Run process on specific NUMA node
numactl --cpunodebind=0 --membind=0 command
# Interleave memory across nodes
numactl --interleave=all command
# Check NUMA balancing
cat /proc/sys/kernel/numa_balancing
# Disable NUMA balancing (for some workloads)
echo 0 | sudo tee /proc/sys/kernel/numa_balancing
Disk I/O Performance
I/O Monitoring
# Per-device I/O statistics
iostat -xz 1
# Key metrics:
# r/s, w/s - reads/writes per second
# rkB/s, wkB/s - throughput
# await - average I/O wait time (ms)
# avgqu-sz - average queue length
# %util - device utilization
# I/O by process
iotop
pidstat -d 1
# Block I/O tracing
blktrace -d /dev/sda -o - | blkparse -i -
# Check for I/O wait
vmstat 1 | awk '{print $16}' # wa column
I/O Scheduler
# Check current scheduler
cat /sys/block/sda/queue/scheduler
# Available schedulers (kernel dependent)
# mq-deadline - Good for HDDs
# none/noop - Good for NVMe/SSDs (no scheduling needed)
# bfq - Good for desktop/interactive
# kyber - Good for fast SSDs
# Change scheduler
echo mq-deadline | sudo tee /sys/block/sda/queue/scheduler
# Persistent via udev rule
# /etc/udev/rules.d/60-scheduler.rules
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="1", ATTR{queue/scheduler}="mq-deadline"
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", ATTR{queue/scheduler}="none"
ACTION=="add|change", KERNEL=="nvme[0-9]n[0-9]", ATTR{queue/scheduler}="none"
Block Device Tuning
# Read-ahead (in 512-byte sectors)
cat /sys/block/sda/queue/read_ahead_kb
echo 256 | sudo tee /sys/block/sda/queue/read_ahead_kb
# Queue depth
cat /sys/block/sda/device/queue_depth
echo 64 | sudo tee /sys/block/sda/device/queue_depth
# Number of requests in queue
cat /sys/block/sda/queue/nr_requests
echo 256 | sudo tee /sys/block/sda/queue/nr_requests
# Disable NCQ (if causing issues)
echo 1 | sudo tee /sys/block/sda/device/queue_depth
Filesystem Tuning
# Mount options for ext4
# /etc/fstab
/dev/sda1 / ext4 defaults,noatime,nodiratime,discard 0 1
# noatime - Don't update access times
# nodiratime - Don't update directory access times
# discard - Enable TRIM for SSDs (or use fstrim.timer)
# Mount options for XFS
/dev/sda1 / xfs defaults,noatime,logbufs=8,logbsize=256k 0 1
# Enable periodic TRIM
systemctl enable fstrim.timer
# Manual TRIM
fstrim -av
# Check filesystem fragmentation (ext4)
e4defrag -c /path
# Defragment (ext4)
e4defrag /path
Network Performance
Network Monitoring
# Interface statistics
ip -s link show eth0
cat /proc/net/dev
# Real-time bandwidth
iftop
nethogs
nload
# Connection statistics
ss -s
# TCP statistics
netstat -s | grep -i tcp
cat /proc/net/snmp | grep Tcp
# Errors and drops
ethtool -S eth0 | grep -E "error|drop"
Network Sysctl Tuning
# /etc/sysctl.d/99-network.conf
# TCP buffer sizes (min, default, max)
net.core.rmem_default = 262144
net.core.wmem_default = 262144
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 262144 16777216
net.ipv4.tcp_wmem = 4096 262144 16777216
# Connection tracking
net.netfilter.nf_conntrack_max = 1048576
# TCP tuning
net.ipv4.tcp_fastopen = 3
net.ipv4.tcp_slow_start_after_idle = 0
net.ipv4.tcp_no_metrics_save = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.tcp_keepalive_time = 300
net.ipv4.tcp_keepalive_probes = 5
net.ipv4.tcp_keepalive_intvl = 15
# Connection queue
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 65535
net.ipv4.tcp_max_syn_backlog = 65535
# Port range
net.ipv4.ip_local_port_range = 1024 65535
# TIME_WAIT reuse
net.ipv4.tcp_tw_reuse = 1
# Congestion control
net.ipv4.tcp_congestion_control = bbr
net.core.default_qdisc = fq
# Apply
sysctl -p /etc/sysctl.d/99-network.conf
Enable BBR Congestion Control
# Check if BBR is available
modprobe tcp_bbr
lsmod | grep bbr
# Enable BBR
echo "tcp_bbr" | sudo tee -a /etc/modules-load.d/bbr.conf
cat >> /etc/sysctl.d/99-bbr.conf << 'EOF'
net.core.default_qdisc = fq
net.ipv4.tcp_congestion_control = bbr
EOF
sysctl -p /etc/sysctl.d/99-bbr.conf
# Verify
sysctl net.ipv4.tcp_congestion_control
NIC Tuning
# Check NIC settings
ethtool eth0
# Check ring buffer size
ethtool -g eth0
# Increase ring buffer
ethtool -G eth0 rx 4096 tx 4096
# Check offloading features
ethtool -k eth0
# Enable offloading
ethtool -K eth0 tso on gso on gro on
# Check interrupt coalescing
ethtool -c eth0
# Tune interrupt coalescing
ethtool -C eth0 rx-usecs 100 tx-usecs 100
# Set MTU (Jumbo frames)
ip link set eth0 mtu 9000
# Make persistent with systemd-networkd
# /etc/systemd/network/10-eth0.link
[Match]
MACAddress=xx:xx:xx:xx:xx:xx
[Link]
MTUBytes=9000
IRQ Balancing
# Check IRQ assignments
cat /proc/interrupts
# Check IRQ affinity
cat /proc/irq/*/smp_affinity
# Manual IRQ affinity (CPU mask)
echo 2 | sudo tee /proc/irq/XX/smp_affinity # CPU 1
# Use irqbalance service
systemctl status irqbalance
# Or disable and manually set
systemctl stop irqbalance
systemctl disable irqbalance
Process and System Limits
ulimit Settings
# View current limits
ulimit -a
# Set limits for session
ulimit -n 65535 # Open files
ulimit -u 65535 # Max user processes
ulimit -l unlimited # Locked memory
# Persistent limits
# /etc/security/limits.conf or /etc/security/limits.d/99-custom.conf
* soft nofile 65535
* hard nofile 65535
* soft nproc 65535
* hard nproc 65535
root soft nofile 65535
root hard nofile 65535
# Systemd service limits
# /etc/systemd/system/myservice.service.d/limits.conf
[Service]
LimitNOFILE=65535
LimitNPROC=65535
LimitMEMLOCK=infinity
Profiling Tools
perf
# Install perf
# Arch: pacman -S perf
# Debian: apt install linux-perf
# RHEL: dnf install perf
# Real-time function analysis
perf top
# Record profile
perf record -g command
perf record -g -p PID -- sleep 30
# Analyze profile
perf report
# CPU statistics
perf stat command
perf stat -d command # Detailed
# Trace syscalls
perf trace command
perf trace -p PID
strace and ltrace
# Trace syscalls
strace command
strace -p PID
# With timing
strace -T command
# Count syscalls
strace -c command
# Trace specific syscalls
strace -e open,read,write command
# Trace library calls
ltrace command
Other Profiling Tools
# CPU flame graphs (with perf)
perf record -g -a -- sleep 30
perf script | stackcollapse-perf.pl | flamegraph.pl > flamegraph.svg
# Memory profiling
valgrind --tool=massif command
ms_print massif.out.*
# I/O profiling
blktrace -d /dev/sda -o trace
blkparse -i trace
# Network latency
ping -c 100 host | tail -1
ss -ti
Benchmarking
CPU Benchmarks
# sysbench CPU
sysbench cpu --threads=8 run
# stress-ng CPU
stress-ng --cpu 8 --cpu-method matrixprod -t 60
# 7-Zip benchmark
7z b
Memory Benchmarks
# sysbench memory
sysbench memory --threads=8 run
# stress-ng memory
stress-ng --vm 2 --vm-bytes 2G -t 60
# memtest (from GRUB or boot media)
Disk Benchmarks
# fio - Flexible I/O tester
# Random read
fio --name=randread --ioengine=libaio --iodepth=16 --rw=randread \
--bs=4k --direct=1 --size=1G --numjobs=4 --runtime=60 --group_reporting
# Random write
fio --name=randwrite --ioengine=libaio --iodepth=16 --rw=randwrite \
--bs=4k --direct=1 --size=1G --numjobs=4 --runtime=60 --group_reporting
# Sequential read
fio --name=seqread --ioengine=libaio --iodepth=16 --rw=read \
--bs=1M --direct=1 --size=1G --numjobs=1 --runtime=60 --group_reporting
# dd benchmark (simple)
dd if=/dev/zero of=/tmp/test bs=1M count=1024 conv=fdatasync
dd if=/tmp/test of=/dev/null bs=1M
Workload-Specific Tuning
Database Servers
# /etc/sysctl.d/99-database.conf
# Memory
vm.swappiness = 1
vm.dirty_ratio = 5
vm.dirty_background_ratio = 1
vm.overcommit_memory = 2
vm.overcommit_ratio = 80
# Disable THP
# transparent_hugepage=never (kernel param)
# I/O scheduler
# Use deadline for HDDs, none for SSDs
# Filesystem
# noatime,nobarrier (if battery-backed cache)
Web Servers
# /etc/sysctl.d/99-webserver.conf
# Network
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.ip_local_port_range = 1024 65535
# File descriptors
fs.file-max = 2097152
# Keep-alive
net.ipv4.tcp_keepalive_time = 60
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl = 10
Container Hosts
# /etc/sysctl.d/99-containers.conf
# Network namespaces
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
# Memory
vm.max_map_count = 262144
# PIDs
kernel.pid_max = 4194304
# File descriptors
fs.file-max = 2097152
fs.inotify.max_user_watches = 524288
fs.inotify.max_user_instances = 512
Quick Reference
# Performance analysis (60-second checklist)
uptime # Load averages
dmesg | tail # Kernel errors
vmstat 1 5 # Overall stats
mpstat -P ALL 1 5 # CPU balance
iostat -xz 1 5 # Disk I/O
free -m # Memory
sar -n DEV 1 5 # Network I/O
# Key sysctl settings
vm.swappiness = 10 # Reduce swapping
vm.dirty_ratio = 15 # Dirty page threshold
net.core.somaxconn = 65535 # Connection backlog
net.ipv4.tcp_tw_reuse = 1 # TIME_WAIT reuse
# CPU governor
cpupower frequency-set -g performance
# I/O scheduler
echo mq-deadline > /sys/block/sda/queue/scheduler # HDD
echo none > /sys/block/nvme0n1/queue/scheduler # NVMe
# Process limits
ulimit -n 65535 # Open files
ulimit -u 65535 # Max processes