Performance Tuning

Quick Reference

# System overview
top / htop / atop
vmstat 1
iostat -xz 1
sar -u 1 5

# CPU
mpstat -P ALL 1
perf top
turbostat

# Memory
free -h
vmstat -s
cat /proc/meminfo

# Disk I/O
iotop
iostat -xdz 1
blktrace /dev/sda

# Network
iftop
nethogs
ss -s

Performance Analysis Methodology

USE Method (Utilization, Saturation, Errors)

For each resource, check:

Metric Description Tools

Metric	Description	Tools
Utilization	Percentage of time resource is busy	`top`, `iostat`, `sar`
Saturation	Degree to which work is queued	`vmstat` (run queue), `iostat` (avgqu-sz)
Errors	Count of error events	`dmesg`, `journalctl`, `/proc/interrupts`

Utilization

Percentage of time resource is busy

top, iostat, sar

Saturation

Degree to which work is queued

vmstat (run queue), iostat (avgqu-sz)

Errors

Count of error events

dmesg, journalctl, /proc/interrupts

Quick System Assessment

# 60-second analysis checklist
uptime                       # Load averages
dmesg | tail                 # Kernel errors
vmstat 1 5                   # Overall stats
mpstat -P ALL 1 5            # CPU balance
pidstat 1 5                  # Process CPU
iostat -xz 1 5               # Disk I/O
free -m                      # Memory
sar -n DEV 1 5               # Network I/O
sar -n TCP,ETCP 1 5          # TCP stats
top                          # Overview

CPU Performance

CPU Monitoring

# Overall CPU usage
top
htop

# Per-CPU statistics
mpstat -P ALL 1

# CPU usage by process
pidstat -u 1

# Real-time function profiling
perf top

# CPU frequency and turbo
turbostat --Summary --quiet
cat /proc/cpuinfo | grep MHz

# Load average
uptime
cat /proc/loadavg

Understanding Load Average

# Load average: 1-min, 5-min, 15-min
# Rule: load average / CPU count
#   < 1.0 = OK
#   1.0-2.0 = acceptable
#   > 2.0 = investigate

# Check CPU count
nproc
lscpu | grep "^CPU(s):"

# Example: 8 CPUs, load 4.0 = 50% utilized
# Example: 4 CPUs, load 8.0 = 200% (overloaded)

CPU Governor and Frequency

# Current governor
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor

# Available governors
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors

# Set governor (all CPUs)
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

# Using cpupower
cpupower frequency-info
cpupower frequency-set -g performance

# Persistent (systemd)
# /etc/systemd/system/cpupower.service
[Unit]
Description=CPU Power Management
After=multi-user.target

[Service]
Type=oneshot
ExecStart=/usr/bin/cpupower frequency-set -g performance
RemainAfterExit=yes

[Install]
WantedBy=multi-user.target

Process Priority and Affinity

# Nice values (-20 to 19, lower = higher priority)
nice -n -10 command          # Start with priority
renice -n -5 -p PID          # Change running process

# CPU affinity
taskset -c 0,1 command       # Run on CPUs 0,1
taskset -pc 0-3 PID          # Set affinity for running process

# View process affinity
taskset -p PID

# Isolate CPUs from scheduler (kernel boot parameter)
# isolcpus=2,3
# Then pin specific processes to those CPUs

CPU Sysctl Tuning

# /etc/sysctl.d/99-cpu.conf

# Scheduler tuning
kernel.sched_migration_cost_ns = 5000000
kernel.sched_autogroup_enabled = 0

# For latency-sensitive workloads
kernel.sched_min_granularity_ns = 10000000
kernel.sched_wakeup_granularity_ns = 15000000

# Apply
sysctl -p /etc/sysctl.d/99-cpu.conf

Memory Performance

Memory Monitoring

# Overview
free -h
cat /proc/meminfo

# Detailed statistics
vmstat -s

# Memory usage by process
ps aux --sort=-%mem | head
top -o %MEM

# Page faults
sar -B 1

# Swap usage
swapon --show
cat /proc/swaps
vmstat 1 | awk '{print $7, $8}'  # si, so columns

Understanding Memory Metrics

# Key /proc/meminfo fields
MemTotal       # Total physical RAM
MemFree        # Completely unused
MemAvailable   # Available for allocation (includes reclaimable)
Buffers        # Block device buffers
Cached         # Page cache
SwapTotal      # Total swap
SwapFree       # Unused swap
Dirty          # Waiting to be written to disk
Writeback      # Being written to disk
Mapped         # Memory mapped files
Slab           # Kernel data structures
SReclaimable   # Reclaimable slab memory
PageTables     # Page table memory

Swap Configuration

# Check swappiness (0-100, higher = more aggressive)
cat /proc/sys/vm/swappiness

# Set swappiness
# For servers with plenty of RAM
echo 10 | sudo tee /proc/sys/vm/swappiness

# Persistent
echo "vm.swappiness = 10" | sudo tee -a /etc/sysctl.d/99-memory.conf

# Disable swap (not recommended for most cases)
swapoff -a

# Create swap file
dd if=/dev/zero of=/swapfile bs=1M count=4096
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile

# Persistent swap in /etc/fstab
/swapfile none swap sw 0 0

Memory Sysctl Tuning

# /etc/sysctl.d/99-memory.conf

# Swappiness (0-100)
vm.swappiness = 10

# VFS cache pressure (default 100)
vm.vfs_cache_pressure = 50

# Dirty page settings
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
vm.dirty_expire_centisecs = 3000
vm.dirty_writeback_centisecs = 500

# Overcommit settings
vm.overcommit_memory = 0
vm.overcommit_ratio = 50

# For large memory systems
vm.min_free_kbytes = 262144

# Zone reclaim mode (NUMA)
vm.zone_reclaim_mode = 0

# Apply
sysctl -p /etc/sysctl.d/99-memory.conf

Transparent Huge Pages

# Check THP status
cat /sys/kernel/mm/transparent_hugepage/enabled

# Disable THP (recommended for databases)
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/defrag

# Persistent (add to kernel parameters or use systemd)
# Kernel parameter: transparent_hugepage=never

# Or systemd service
cat > /etc/systemd/system/disable-thp.service << 'EOF'
[Unit]
Description=Disable Transparent Huge Pages

[Service]
Type=oneshot
ExecStart=/bin/sh -c "echo never > /sys/kernel/mm/transparent_hugepage/enabled"
ExecStart=/bin/sh -c "echo never > /sys/kernel/mm/transparent_hugepage/defrag"
RemainAfterExit=yes

[Install]
WantedBy=multi-user.target
EOF

systemctl enable disable-thp.service

NUMA Optimization

# Check NUMA topology
numactl --hardware
lscpu | grep NUMA

# View NUMA statistics
numastat
numastat -p PID

# Run process on specific NUMA node
numactl --cpunodebind=0 --membind=0 command

# Interleave memory across nodes
numactl --interleave=all command

# Check NUMA balancing
cat /proc/sys/kernel/numa_balancing

# Disable NUMA balancing (for some workloads)
echo 0 | sudo tee /proc/sys/kernel/numa_balancing

Disk I/O Performance

I/O Monitoring

# Per-device I/O statistics
iostat -xz 1

# Key metrics:
# r/s, w/s      - reads/writes per second
# rkB/s, wkB/s  - throughput
# await         - average I/O wait time (ms)
# avgqu-sz      - average queue length
# %util         - device utilization

# I/O by process
iotop
pidstat -d 1

# Block I/O tracing
blktrace -d /dev/sda -o - | blkparse -i -

# Check for I/O wait
vmstat 1 | awk '{print $16}'  # wa column

I/O Scheduler

# Check current scheduler
cat /sys/block/sda/queue/scheduler

# Available schedulers (kernel dependent)
# mq-deadline - Good for HDDs
# none/noop   - Good for NVMe/SSDs (no scheduling needed)
# bfq         - Good for desktop/interactive
# kyber       - Good for fast SSDs

# Change scheduler
echo mq-deadline | sudo tee /sys/block/sda/queue/scheduler

# Persistent via udev rule
# /etc/udev/rules.d/60-scheduler.rules
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="1", ATTR{queue/scheduler}="mq-deadline"
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", ATTR{queue/scheduler}="none"
ACTION=="add|change", KERNEL=="nvme[0-9]n[0-9]", ATTR{queue/scheduler}="none"

Block Device Tuning

# Read-ahead (in 512-byte sectors)
cat /sys/block/sda/queue/read_ahead_kb
echo 256 | sudo tee /sys/block/sda/queue/read_ahead_kb

# Queue depth
cat /sys/block/sda/device/queue_depth
echo 64 | sudo tee /sys/block/sda/device/queue_depth

# Number of requests in queue
cat /sys/block/sda/queue/nr_requests
echo 256 | sudo tee /sys/block/sda/queue/nr_requests

# Disable NCQ (if causing issues)
echo 1 | sudo tee /sys/block/sda/device/queue_depth

Filesystem Tuning

# Mount options for ext4
# /etc/fstab
/dev/sda1 / ext4 defaults,noatime,nodiratime,discard 0 1

# noatime     - Don't update access times
# nodiratime  - Don't update directory access times
# discard     - Enable TRIM for SSDs (or use fstrim.timer)

# Mount options for XFS
/dev/sda1 / xfs defaults,noatime,logbufs=8,logbsize=256k 0 1

# Enable periodic TRIM
systemctl enable fstrim.timer

# Manual TRIM
fstrim -av

# Check filesystem fragmentation (ext4)
e4defrag -c /path

# Defragment (ext4)
e4defrag /path

I/O Sysctl Tuning

# /etc/sysctl.d/99-io.conf

# Increase max AIO requests
fs.aio-max-nr = 1048576

# File handle limits
fs.file-max = 2097152
fs.nr_open = 2097152

# Inotify limits
fs.inotify.max_user_watches = 524288
fs.inotify.max_user_instances = 512

# Apply
sysctl -p /etc/sysctl.d/99-io.conf

Network Performance

Network Monitoring

# Interface statistics
ip -s link show eth0
cat /proc/net/dev

# Real-time bandwidth
iftop
nethogs
nload

# Connection statistics
ss -s

# TCP statistics
netstat -s | grep -i tcp
cat /proc/net/snmp | grep Tcp

# Errors and drops
ethtool -S eth0 | grep -E "error|drop"

Network Sysctl Tuning

# /etc/sysctl.d/99-network.conf

# TCP buffer sizes (min, default, max)
net.core.rmem_default = 262144
net.core.wmem_default = 262144
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 262144 16777216
net.ipv4.tcp_wmem = 4096 262144 16777216

# Connection tracking
net.netfilter.nf_conntrack_max = 1048576

# TCP tuning
net.ipv4.tcp_fastopen = 3
net.ipv4.tcp_slow_start_after_idle = 0
net.ipv4.tcp_no_metrics_save = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.tcp_keepalive_time = 300
net.ipv4.tcp_keepalive_probes = 5
net.ipv4.tcp_keepalive_intvl = 15

# Connection queue
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 65535
net.ipv4.tcp_max_syn_backlog = 65535

# Port range
net.ipv4.ip_local_port_range = 1024 65535

# TIME_WAIT reuse
net.ipv4.tcp_tw_reuse = 1

# Congestion control
net.ipv4.tcp_congestion_control = bbr
net.core.default_qdisc = fq

# Apply
sysctl -p /etc/sysctl.d/99-network.conf

Enable BBR Congestion Control

# Check if BBR is available
modprobe tcp_bbr
lsmod | grep bbr

# Enable BBR
echo "tcp_bbr" | sudo tee -a /etc/modules-load.d/bbr.conf

cat >> /etc/sysctl.d/99-bbr.conf << 'EOF'
net.core.default_qdisc = fq
net.ipv4.tcp_congestion_control = bbr
EOF

sysctl -p /etc/sysctl.d/99-bbr.conf

# Verify
sysctl net.ipv4.tcp_congestion_control

NIC Tuning

# Check NIC settings
ethtool eth0

# Check ring buffer size
ethtool -g eth0

# Increase ring buffer
ethtool -G eth0 rx 4096 tx 4096

# Check offloading features
ethtool -k eth0

# Enable offloading
ethtool -K eth0 tso on gso on gro on

# Check interrupt coalescing
ethtool -c eth0

# Tune interrupt coalescing
ethtool -C eth0 rx-usecs 100 tx-usecs 100

# Set MTU (Jumbo frames)
ip link set eth0 mtu 9000

# Make persistent with systemd-networkd
# /etc/systemd/network/10-eth0.link
[Match]
MACAddress=xx:xx:xx:xx:xx:xx

[Link]
MTUBytes=9000

IRQ Balancing

# Check IRQ assignments
cat /proc/interrupts

# Check IRQ affinity
cat /proc/irq/*/smp_affinity

# Manual IRQ affinity (CPU mask)
echo 2 | sudo tee /proc/irq/XX/smp_affinity  # CPU 1

# Use irqbalance service
systemctl status irqbalance

# Or disable and manually set
systemctl stop irqbalance
systemctl disable irqbalance

Process and System Limits

ulimit Settings

# View current limits
ulimit -a

# Set limits for session
ulimit -n 65535      # Open files
ulimit -u 65535      # Max user processes
ulimit -l unlimited  # Locked memory

# Persistent limits
# /etc/security/limits.conf or /etc/security/limits.d/99-custom.conf
*               soft    nofile          65535
*               hard    nofile          65535
*               soft    nproc           65535
*               hard    nproc           65535
root            soft    nofile          65535
root            hard    nofile          65535

# Systemd service limits
# /etc/systemd/system/myservice.service.d/limits.conf
[Service]
LimitNOFILE=65535
LimitNPROC=65535
LimitMEMLOCK=infinity

System-wide Limits

# /etc/sysctl.d/99-limits.conf

# Max open files system-wide
fs.file-max = 2097152

# Max PID
kernel.pid_max = 4194304

# Max threads
kernel.threads-max = 4194304

# Apply
sysctl -p /etc/sysctl.d/99-limits.conf

Profiling Tools

perf

# Install perf
# Arch: pacman -S perf
# Debian: apt install linux-perf
# RHEL: dnf install perf

# Real-time function analysis
perf top

# Record profile
perf record -g command
perf record -g -p PID -- sleep 30

# Analyze profile
perf report

# CPU statistics
perf stat command
perf stat -d command    # Detailed

# Trace syscalls
perf trace command
perf trace -p PID

strace and ltrace

# Trace syscalls
strace command
strace -p PID

# With timing
strace -T command

# Count syscalls
strace -c command

# Trace specific syscalls
strace -e open,read,write command

# Trace library calls
ltrace command

Other Profiling Tools

# CPU flame graphs (with perf)
perf record -g -a -- sleep 30
perf script | stackcollapse-perf.pl | flamegraph.pl > flamegraph.svg

# Memory profiling
valgrind --tool=massif command
ms_print massif.out.*

# I/O profiling
blktrace -d /dev/sda -o trace
blkparse -i trace

# Network latency
ping -c 100 host | tail -1
ss -ti

Benchmarking

CPU Benchmarks

# sysbench CPU
sysbench cpu --threads=8 run

# stress-ng CPU
stress-ng --cpu 8 --cpu-method matrixprod -t 60

# 7-Zip benchmark
7z b

Memory Benchmarks

# sysbench memory
sysbench memory --threads=8 run

# stress-ng memory
stress-ng --vm 2 --vm-bytes 2G -t 60

# memtest (from GRUB or boot media)

Disk Benchmarks

# fio - Flexible I/O tester
# Random read
fio --name=randread --ioengine=libaio --iodepth=16 --rw=randread \
    --bs=4k --direct=1 --size=1G --numjobs=4 --runtime=60 --group_reporting

# Random write
fio --name=randwrite --ioengine=libaio --iodepth=16 --rw=randwrite \
    --bs=4k --direct=1 --size=1G --numjobs=4 --runtime=60 --group_reporting

# Sequential read
fio --name=seqread --ioengine=libaio --iodepth=16 --rw=read \
    --bs=1M --direct=1 --size=1G --numjobs=1 --runtime=60 --group_reporting

# dd benchmark (simple)
dd if=/dev/zero of=/tmp/test bs=1M count=1024 conv=fdatasync
dd if=/tmp/test of=/dev/null bs=1M

Network Benchmarks

# iperf3 (server and client)
iperf3 -s                    # Server
iperf3 -c server_ip          # Client
iperf3 -c server_ip -R       # Reverse (download)
iperf3 -c server_ip -P 4     # Parallel streams

# netperf
netserver                    # Server
netperf -H server_ip         # Client

Workload-Specific Tuning

Database Servers

# /etc/sysctl.d/99-database.conf

# Memory
vm.swappiness = 1
vm.dirty_ratio = 5
vm.dirty_background_ratio = 1
vm.overcommit_memory = 2
vm.overcommit_ratio = 80

# Disable THP
# transparent_hugepage=never (kernel param)

# I/O scheduler
# Use deadline for HDDs, none for SSDs

# Filesystem
# noatime,nobarrier (if battery-backed cache)

Web Servers

# /etc/sysctl.d/99-webserver.conf

# Network
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.ip_local_port_range = 1024 65535

# File descriptors
fs.file-max = 2097152

# Keep-alive
net.ipv4.tcp_keepalive_time = 60
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl = 10

Container Hosts

# /etc/sysctl.d/99-containers.conf

# Network namespaces
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1

# Memory
vm.max_map_count = 262144

# PIDs
kernel.pid_max = 4194304

# File descriptors
fs.file-max = 2097152
fs.inotify.max_user_watches = 524288
fs.inotify.max_user_instances = 512

Quick Reference

# Performance analysis (60-second checklist)
uptime                       # Load averages
dmesg | tail                 # Kernel errors
vmstat 1 5                   # Overall stats
mpstat -P ALL 1 5            # CPU balance
iostat -xz 1 5               # Disk I/O
free -m                      # Memory
sar -n DEV 1 5               # Network I/O

# Key sysctl settings
vm.swappiness = 10           # Reduce swapping
vm.dirty_ratio = 15          # Dirty page threshold
net.core.somaxconn = 65535   # Connection backlog
net.ipv4.tcp_tw_reuse = 1    # TIME_WAIT reuse

# CPU governor
cpupower frequency-set -g performance

# I/O scheduler
echo mq-deadline > /sys/block/sda/queue/scheduler  # HDD
echo none > /sys/block/nvme0n1/queue/scheduler     # NVMe

# Process limits
ulimit -n 65535              # Open files
ulimit -u 65535              # Max processes

Performance Tuning

Quick Reference

Performance Analysis Methodology

USE Method (Utilization, Saturation, Errors)

Quick System Assessment

CPU Performance

CPU Monitoring

Understanding Load Average

CPU Governor and Frequency

Process Priority and Affinity

CPU Sysctl Tuning

Memory Performance

Memory Monitoring

Understanding Memory Metrics

Swap Configuration

Memory Sysctl Tuning

Transparent Huge Pages

NUMA Optimization

Disk I/O Performance

I/O Monitoring

I/O Scheduler

Block Device Tuning

Filesystem Tuning

I/O Sysctl Tuning

Network Performance

Network Monitoring

Network Sysctl Tuning

Enable BBR Congestion Control

NIC Tuning

IRQ Balancing

Process and System Limits

ulimit Settings

System-wide Limits

Profiling Tools

perf

strace and ltrace

Other Profiling Tools

Benchmarking

CPU Benchmarks

Memory Benchmarks

Disk Benchmarks

Network Benchmarks

Workload-Specific Tuning

Database Servers

Web Servers

Container Hosts

Quick Reference

See Also