Performance Tuning

Quick Reference

# System overview
top / htop / atop
vmstat 1
iostat -xz 1
sar -u 1 5

# CPU
mpstat -P ALL 1
perf top
turbostat

# Memory
free -h
vmstat -s
cat /proc/meminfo

# Disk I/O
iotop
iostat -xdz 1
blktrace /dev/sda

# Network
iftop
nethogs
ss -s

Performance Analysis Methodology

USE Method (Utilization, Saturation, Errors)

For each resource, check:

Metric Description Tools

Utilization

Percentage of time resource is busy

top, iostat, sar

Saturation

Degree to which work is queued

vmstat (run queue), iostat (avgqu-sz)

Errors

Count of error events

dmesg, journalctl, /proc/interrupts

Quick System Assessment

# 60-second analysis checklist
uptime                       # Load averages
dmesg | tail                 # Kernel errors
vmstat 1 5                   # Overall stats
mpstat -P ALL 1 5            # CPU balance
pidstat 1 5                  # Process CPU
iostat -xz 1 5               # Disk I/O
free -m                      # Memory
sar -n DEV 1 5               # Network I/O
sar -n TCP,ETCP 1 5          # TCP stats
top                          # Overview

CPU Performance

CPU Monitoring

# Overall CPU usage
top
htop

# Per-CPU statistics
mpstat -P ALL 1

# CPU usage by process
pidstat -u 1

# Real-time function profiling
perf top

# CPU frequency and turbo
turbostat --Summary --quiet
cat /proc/cpuinfo | grep MHz

# Load average
uptime
cat /proc/loadavg

Understanding Load Average

# Load average: 1-min, 5-min, 15-min
# Rule: load average / CPU count
#   < 1.0 = OK
#   1.0-2.0 = acceptable
#   > 2.0 = investigate

# Check CPU count
nproc
lscpu | grep "^CPU(s):"

# Example: 8 CPUs, load 4.0 = 50% utilized
# Example: 4 CPUs, load 8.0 = 200% (overloaded)

CPU Governor and Frequency

# Current governor
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor

# Available governors
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors

# Set governor (all CPUs)
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

# Using cpupower
cpupower frequency-info
cpupower frequency-set -g performance

# Persistent (systemd)
# /etc/systemd/system/cpupower.service
[Unit]
Description=CPU Power Management
After=multi-user.target

[Service]
Type=oneshot
ExecStart=/usr/bin/cpupower frequency-set -g performance
RemainAfterExit=yes

[Install]
WantedBy=multi-user.target

Process Priority and Affinity

# Nice values (-20 to 19, lower = higher priority)
nice -n -10 command          # Start with priority
renice -n -5 -p PID          # Change running process

# CPU affinity
taskset -c 0,1 command       # Run on CPUs 0,1
taskset -pc 0-3 PID          # Set affinity for running process

# View process affinity
taskset -p PID

# Isolate CPUs from scheduler (kernel boot parameter)
# isolcpus=2,3
# Then pin specific processes to those CPUs

CPU Sysctl Tuning

# /etc/sysctl.d/99-cpu.conf

# Scheduler tuning
kernel.sched_migration_cost_ns = 5000000
kernel.sched_autogroup_enabled = 0

# For latency-sensitive workloads
kernel.sched_min_granularity_ns = 10000000
kernel.sched_wakeup_granularity_ns = 15000000

# Apply
sysctl -p /etc/sysctl.d/99-cpu.conf

Memory Performance

Memory Monitoring

# Overview
free -h
cat /proc/meminfo

# Detailed statistics
vmstat -s

# Memory usage by process
ps aux --sort=-%mem | head
top -o %MEM

# Page faults
sar -B 1

# Swap usage
swapon --show
cat /proc/swaps
vmstat 1 | awk '{print $7, $8}'  # si, so columns

Understanding Memory Metrics

# Key /proc/meminfo fields
MemTotal       # Total physical RAM
MemFree        # Completely unused
MemAvailable   # Available for allocation (includes reclaimable)
Buffers        # Block device buffers
Cached         # Page cache
SwapTotal      # Total swap
SwapFree       # Unused swap
Dirty          # Waiting to be written to disk
Writeback      # Being written to disk
Mapped         # Memory mapped files
Slab           # Kernel data structures
SReclaimable   # Reclaimable slab memory
PageTables     # Page table memory

Swap Configuration

# Check swappiness (0-100, higher = more aggressive)
cat /proc/sys/vm/swappiness

# Set swappiness
# For servers with plenty of RAM
echo 10 | sudo tee /proc/sys/vm/swappiness

# Persistent
echo "vm.swappiness = 10" | sudo tee -a /etc/sysctl.d/99-memory.conf

# Disable swap (not recommended for most cases)
swapoff -a

# Create swap file
dd if=/dev/zero of=/swapfile bs=1M count=4096
chmod 600 /swapfile
mkswap /swapfile
swapon /swapfile

# Persistent swap in /etc/fstab
/swapfile none swap sw 0 0

Memory Sysctl Tuning

# /etc/sysctl.d/99-memory.conf

# Swappiness (0-100)
vm.swappiness = 10

# VFS cache pressure (default 100)
vm.vfs_cache_pressure = 50

# Dirty page settings
vm.dirty_ratio = 15
vm.dirty_background_ratio = 5
vm.dirty_expire_centisecs = 3000
vm.dirty_writeback_centisecs = 500

# Overcommit settings
vm.overcommit_memory = 0
vm.overcommit_ratio = 50

# For large memory systems
vm.min_free_kbytes = 262144

# Zone reclaim mode (NUMA)
vm.zone_reclaim_mode = 0

# Apply
sysctl -p /etc/sysctl.d/99-memory.conf

Transparent Huge Pages

# Check THP status
cat /sys/kernel/mm/transparent_hugepage/enabled

# Disable THP (recommended for databases)
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo never | sudo tee /sys/kernel/mm/transparent_hugepage/defrag

# Persistent (add to kernel parameters or use systemd)
# Kernel parameter: transparent_hugepage=never

# Or systemd service
cat > /etc/systemd/system/disable-thp.service << 'EOF'
[Unit]
Description=Disable Transparent Huge Pages

[Service]
Type=oneshot
ExecStart=/bin/sh -c "echo never > /sys/kernel/mm/transparent_hugepage/enabled"
ExecStart=/bin/sh -c "echo never > /sys/kernel/mm/transparent_hugepage/defrag"
RemainAfterExit=yes

[Install]
WantedBy=multi-user.target
EOF

systemctl enable disable-thp.service

NUMA Optimization

# Check NUMA topology
numactl --hardware
lscpu | grep NUMA

# View NUMA statistics
numastat
numastat -p PID

# Run process on specific NUMA node
numactl --cpunodebind=0 --membind=0 command

# Interleave memory across nodes
numactl --interleave=all command

# Check NUMA balancing
cat /proc/sys/kernel/numa_balancing

# Disable NUMA balancing (for some workloads)
echo 0 | sudo tee /proc/sys/kernel/numa_balancing

Disk I/O Performance

I/O Monitoring

# Per-device I/O statistics
iostat -xz 1

# Key metrics:
# r/s, w/s      - reads/writes per second
# rkB/s, wkB/s  - throughput
# await         - average I/O wait time (ms)
# avgqu-sz      - average queue length
# %util         - device utilization

# I/O by process
iotop
pidstat -d 1

# Block I/O tracing
blktrace -d /dev/sda -o - | blkparse -i -

# Check for I/O wait
vmstat 1 | awk '{print $16}'  # wa column

I/O Scheduler

# Check current scheduler
cat /sys/block/sda/queue/scheduler

# Available schedulers (kernel dependent)
# mq-deadline - Good for HDDs
# none/noop   - Good for NVMe/SSDs (no scheduling needed)
# bfq         - Good for desktop/interactive
# kyber       - Good for fast SSDs

# Change scheduler
echo mq-deadline | sudo tee /sys/block/sda/queue/scheduler

# Persistent via udev rule
# /etc/udev/rules.d/60-scheduler.rules
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="1", ATTR{queue/scheduler}="mq-deadline"
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", ATTR{queue/scheduler}="none"
ACTION=="add|change", KERNEL=="nvme[0-9]n[0-9]", ATTR{queue/scheduler}="none"

Block Device Tuning

# Read-ahead (in 512-byte sectors)
cat /sys/block/sda/queue/read_ahead_kb
echo 256 | sudo tee /sys/block/sda/queue/read_ahead_kb

# Queue depth
cat /sys/block/sda/device/queue_depth
echo 64 | sudo tee /sys/block/sda/device/queue_depth

# Number of requests in queue
cat /sys/block/sda/queue/nr_requests
echo 256 | sudo tee /sys/block/sda/queue/nr_requests

# Disable NCQ (if causing issues)
echo 1 | sudo tee /sys/block/sda/device/queue_depth

Filesystem Tuning

# Mount options for ext4
# /etc/fstab
/dev/sda1 / ext4 defaults,noatime,nodiratime,discard 0 1

# noatime     - Don't update access times
# nodiratime  - Don't update directory access times
# discard     - Enable TRIM for SSDs (or use fstrim.timer)

# Mount options for XFS
/dev/sda1 / xfs defaults,noatime,logbufs=8,logbsize=256k 0 1

# Enable periodic TRIM
systemctl enable fstrim.timer

# Manual TRIM
fstrim -av

# Check filesystem fragmentation (ext4)
e4defrag -c /path

# Defragment (ext4)
e4defrag /path

I/O Sysctl Tuning

# /etc/sysctl.d/99-io.conf

# Increase max AIO requests
fs.aio-max-nr = 1048576

# File handle limits
fs.file-max = 2097152
fs.nr_open = 2097152

# Inotify limits
fs.inotify.max_user_watches = 524288
fs.inotify.max_user_instances = 512

# Apply
sysctl -p /etc/sysctl.d/99-io.conf

Network Performance

Network Monitoring

# Interface statistics
ip -s link show eth0
cat /proc/net/dev

# Real-time bandwidth
iftop
nethogs
nload

# Connection statistics
ss -s

# TCP statistics
netstat -s | grep -i tcp
cat /proc/net/snmp | grep Tcp

# Errors and drops
ethtool -S eth0 | grep -E "error|drop"

Network Sysctl Tuning

# /etc/sysctl.d/99-network.conf

# TCP buffer sizes (min, default, max)
net.core.rmem_default = 262144
net.core.wmem_default = 262144
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_rmem = 4096 262144 16777216
net.ipv4.tcp_wmem = 4096 262144 16777216

# Connection tracking
net.netfilter.nf_conntrack_max = 1048576

# TCP tuning
net.ipv4.tcp_fastopen = 3
net.ipv4.tcp_slow_start_after_idle = 0
net.ipv4.tcp_no_metrics_save = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.tcp_keepalive_time = 300
net.ipv4.tcp_keepalive_probes = 5
net.ipv4.tcp_keepalive_intvl = 15

# Connection queue
net.core.somaxconn = 65535
net.core.netdev_max_backlog = 65535
net.ipv4.tcp_max_syn_backlog = 65535

# Port range
net.ipv4.ip_local_port_range = 1024 65535

# TIME_WAIT reuse
net.ipv4.tcp_tw_reuse = 1

# Congestion control
net.ipv4.tcp_congestion_control = bbr
net.core.default_qdisc = fq

# Apply
sysctl -p /etc/sysctl.d/99-network.conf

Enable BBR Congestion Control

# Check if BBR is available
modprobe tcp_bbr
lsmod | grep bbr

# Enable BBR
echo "tcp_bbr" | sudo tee -a /etc/modules-load.d/bbr.conf

cat >> /etc/sysctl.d/99-bbr.conf << 'EOF'
net.core.default_qdisc = fq
net.ipv4.tcp_congestion_control = bbr
EOF

sysctl -p /etc/sysctl.d/99-bbr.conf

# Verify
sysctl net.ipv4.tcp_congestion_control

NIC Tuning

# Check NIC settings
ethtool eth0

# Check ring buffer size
ethtool -g eth0

# Increase ring buffer
ethtool -G eth0 rx 4096 tx 4096

# Check offloading features
ethtool -k eth0

# Enable offloading
ethtool -K eth0 tso on gso on gro on

# Check interrupt coalescing
ethtool -c eth0

# Tune interrupt coalescing
ethtool -C eth0 rx-usecs 100 tx-usecs 100

# Set MTU (Jumbo frames)
ip link set eth0 mtu 9000

# Make persistent with systemd-networkd
# /etc/systemd/network/10-eth0.link
[Match]
MACAddress=xx:xx:xx:xx:xx:xx

[Link]
MTUBytes=9000

IRQ Balancing

# Check IRQ assignments
cat /proc/interrupts

# Check IRQ affinity
cat /proc/irq/*/smp_affinity

# Manual IRQ affinity (CPU mask)
echo 2 | sudo tee /proc/irq/XX/smp_affinity  # CPU 1

# Use irqbalance service
systemctl status irqbalance

# Or disable and manually set
systemctl stop irqbalance
systemctl disable irqbalance

Process and System Limits

ulimit Settings

# View current limits
ulimit -a

# Set limits for session
ulimit -n 65535      # Open files
ulimit -u 65535      # Max user processes
ulimit -l unlimited  # Locked memory

# Persistent limits
# /etc/security/limits.conf or /etc/security/limits.d/99-custom.conf
*               soft    nofile          65535
*               hard    nofile          65535
*               soft    nproc           65535
*               hard    nproc           65535
root            soft    nofile          65535
root            hard    nofile          65535

# Systemd service limits
# /etc/systemd/system/myservice.service.d/limits.conf
[Service]
LimitNOFILE=65535
LimitNPROC=65535
LimitMEMLOCK=infinity

System-wide Limits

# /etc/sysctl.d/99-limits.conf

# Max open files system-wide
fs.file-max = 2097152

# Max PID
kernel.pid_max = 4194304

# Max threads
kernel.threads-max = 4194304

# Apply
sysctl -p /etc/sysctl.d/99-limits.conf

Profiling Tools

perf

# Install perf
# Arch: pacman -S perf
# Debian: apt install linux-perf
# RHEL: dnf install perf

# Real-time function analysis
perf top

# Record profile
perf record -g command
perf record -g -p PID -- sleep 30

# Analyze profile
perf report

# CPU statistics
perf stat command
perf stat -d command    # Detailed

# Trace syscalls
perf trace command
perf trace -p PID

strace and ltrace

# Trace syscalls
strace command
strace -p PID

# With timing
strace -T command

# Count syscalls
strace -c command

# Trace specific syscalls
strace -e open,read,write command

# Trace library calls
ltrace command

Other Profiling Tools

# CPU flame graphs (with perf)
perf record -g -a -- sleep 30
perf script | stackcollapse-perf.pl | flamegraph.pl > flamegraph.svg

# Memory profiling
valgrind --tool=massif command
ms_print massif.out.*

# I/O profiling
blktrace -d /dev/sda -o trace
blkparse -i trace

# Network latency
ping -c 100 host | tail -1
ss -ti

Benchmarking

CPU Benchmarks

# sysbench CPU
sysbench cpu --threads=8 run

# stress-ng CPU
stress-ng --cpu 8 --cpu-method matrixprod -t 60

# 7-Zip benchmark
7z b

Memory Benchmarks

# sysbench memory
sysbench memory --threads=8 run

# stress-ng memory
stress-ng --vm 2 --vm-bytes 2G -t 60

# memtest (from GRUB or boot media)

Disk Benchmarks

# fio - Flexible I/O tester
# Random read
fio --name=randread --ioengine=libaio --iodepth=16 --rw=randread \
    --bs=4k --direct=1 --size=1G --numjobs=4 --runtime=60 --group_reporting

# Random write
fio --name=randwrite --ioengine=libaio --iodepth=16 --rw=randwrite \
    --bs=4k --direct=1 --size=1G --numjobs=4 --runtime=60 --group_reporting

# Sequential read
fio --name=seqread --ioengine=libaio --iodepth=16 --rw=read \
    --bs=1M --direct=1 --size=1G --numjobs=1 --runtime=60 --group_reporting

# dd benchmark (simple)
dd if=/dev/zero of=/tmp/test bs=1M count=1024 conv=fdatasync
dd if=/tmp/test of=/dev/null bs=1M

Network Benchmarks

# iperf3 (server and client)
iperf3 -s                    # Server
iperf3 -c server_ip          # Client
iperf3 -c server_ip -R       # Reverse (download)
iperf3 -c server_ip -P 4     # Parallel streams

# netperf
netserver                    # Server
netperf -H server_ip         # Client

Workload-Specific Tuning

Database Servers

# /etc/sysctl.d/99-database.conf

# Memory
vm.swappiness = 1
vm.dirty_ratio = 5
vm.dirty_background_ratio = 1
vm.overcommit_memory = 2
vm.overcommit_ratio = 80

# Disable THP
# transparent_hugepage=never (kernel param)

# I/O scheduler
# Use deadline for HDDs, none for SSDs

# Filesystem
# noatime,nobarrier (if battery-backed cache)

Web Servers

# /etc/sysctl.d/99-webserver.conf

# Network
net.core.somaxconn = 65535
net.ipv4.tcp_max_syn_backlog = 65535
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_fin_timeout = 15
net.ipv4.ip_local_port_range = 1024 65535

# File descriptors
fs.file-max = 2097152

# Keep-alive
net.ipv4.tcp_keepalive_time = 60
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_intvl = 10

Container Hosts

# /etc/sysctl.d/99-containers.conf

# Network namespaces
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1

# Memory
vm.max_map_count = 262144

# PIDs
kernel.pid_max = 4194304

# File descriptors
fs.file-max = 2097152
fs.inotify.max_user_watches = 524288
fs.inotify.max_user_instances = 512

Quick Reference

# Performance analysis (60-second checklist)
uptime                       # Load averages
dmesg | tail                 # Kernel errors
vmstat 1 5                   # Overall stats
mpstat -P ALL 1 5            # CPU balance
iostat -xz 1 5               # Disk I/O
free -m                      # Memory
sar -n DEV 1 5               # Network I/O

# Key sysctl settings
vm.swappiness = 10           # Reduce swapping
vm.dirty_ratio = 15          # Dirty page threshold
net.core.somaxconn = 65535   # Connection backlog
net.ipv4.tcp_tw_reuse = 1    # TIME_WAIT reuse

# CPU governor
cpupower frequency-set -g performance

# I/O scheduler
echo mq-deadline > /sys/block/sda/queue/scheduler  # HDD
echo none > /sys/block/nvme0n1/queue/scheduler     # NVMe

# Process limits
ulimit -n 65535              # Open files
ulimit -u 65535              # Max processes