Linux Process Management
Process management, signals, and resource control.
Process Inspection
# PS - PROCESS STATUS
ps aux # All processes, user format
ps auxf # With tree (forest)
ps -ef # POSIX format
ps -eo pid,ppid,user,%cpu,%mem,cmd # Custom columns
# USEFUL PS COLUMN COMBOS
ps -eo pid,ppid,user,stat,%cpu,%mem,vsz,rss,tty,start,time,cmd --sort=-%mem | head -20
# stat: process state
# vsz: virtual memory
# rss: resident (actual) memory
# time: cumulative CPU time
# PROCESS STATES:
# R = Running
# S = Sleeping (waiting for event)
# D = Uninterruptible sleep (usually I/O)
# Z = Zombie (terminated, waiting for parent)
# T = Stopped (suspended)
# FIND SPECIFIC PROCESSES
ps aux | grep -v grep | grep nginx # Filter by name
pgrep -a nginx # Better: shows command
pgrep -f "python app.py" # Match full command line
pidof nginx # Just PIDs
# PSTREE - Process hierarchy
pstree # Full tree
pstree -p # With PIDs
pstree -p -a # With arguments
pstree -u user # User's processes
# TOP/HTOP - Interactive monitoring
top # Basic
htop # Better (colors, mouse)
# TOP KEYBOARD SHORTCUTS:
# M = Sort by memory
# P = Sort by CPU
# k = Kill process
# 1 = Show individual CPUs
# c = Show full command
# HTOP ADDITIONS:
# F5 = Tree view
# F6 = Sort by column
# F9 = Kill (signal selection)
# / = Search
# PROCESS INFO FROM /proc
ls /proc/1234/ # Process directory
cat /proc/1234/cmdline | tr '\0' ' ' # Command line
cat /proc/1234/environ | tr '\0' '\n' # Environment
cat /proc/1234/status # Detailed status
cat /proc/1234/limits # Resource limits
ls -l /proc/1234/fd # Open files
ls -l /proc/1234/cwd # Working directory
# MEMORY DETAILS
cat /proc/1234/maps # Memory mappings
cat /proc/1234/smaps | grep -E '^(Size|Rss|Pss):' # Memory stats
# INFRASTRUCTURE: Find memory-hungry processes
ps -eo pid,user,%mem,rss,cmd --sort=-%mem | head -10 | \
awk 'NR>1 {printf "%5s %10s %5s%% %10.1f MB %s\n", $1, $2, $3, $4/1024, $5}'
Process Control
# SIGNALS
# Common signals:
# 1 SIGHUP - Hangup (reload config for many daemons)
# 2 SIGINT - Interrupt (Ctrl+C)
# 9 SIGKILL - Kill immediately (cannot be caught)
# 15 SIGTERM - Terminate gracefully (default)
# 18 SIGCONT - Continue (resume stopped process)
# 19 SIGSTOP - Stop (cannot be caught)
# KILL COMMANDS
kill PID # SIGTERM (graceful)
kill -9 PID # SIGKILL (force)
kill -HUP PID # SIGHUP (reload)
kill -STOP PID # Suspend
kill -CONT PID # Resume
# KILL BY NAME
pkill nginx # SIGTERM to all nginx
pkill -9 -f "python app.py" # SIGKILL by command
killall nginx # Kill all with exact name
# KILL WITH CONFIRMATION
pgrep -a nginx # See what will be killed
pkill nginx # Then kill
# BACKGROUND/FOREGROUND
command & # Run in background
jobs # List background jobs
fg %1 # Bring job 1 to foreground
bg %1 # Continue job 1 in background
Ctrl+Z # Suspend current process
# DISOWN - Survive logout
command & # Start in background
disown %1 # Detach from shell
# Or
nohup command & # Redirect output, ignore hangup
nohup command > /tmp/out.log 2>&1 & # Explicit redirect
# WAIT FOR PROCESSES
wait $PID # Wait for specific PID
wait # Wait for all background jobs
# TIMEOUT
timeout 30 command # Kill after 30 seconds
timeout -s KILL 30 command # Use SIGKILL
timeout --preserve-status 30 command # Preserve exit code
# INFRASTRUCTURE: Kill runaway processes
# Find processes using >50% CPU for >1 minute
ps -eo pid,user,%cpu,etime,cmd --sort=-%cpu | \
awk 'NR>1 && $3>50 && $4~/^[0-9][0-9]:/ {print "RUNAWAY:", $0}'
# Kill zombie processes (must kill parent!)
ps aux | awk '$8=="Z" {print $2, $11}'
# Find parent and kill that instead
Process Priority
# NICE - CPU priority
# Range: -20 (highest) to 19 (lowest), default 0
nice -n 10 command # Start with low priority
nice -n -5 command # High priority (needs root)
renice -n 10 -p 1234 # Change running process
renice -n 5 -u username # All processes for user
# VIEW NICE VALUES
ps -eo pid,ni,cmd | head -20 # ni = nice value
top # NI column
# IONICE - I/O priority
# Classes: 1=realtime, 2=best-effort, 3=idle
ionice -c 3 command # Idle I/O (won't block others)
ionice -c 2 -n 7 command # Best-effort, low priority
ionice -c 2 -n 0 command # Best-effort, high priority
ionice -p 1234 # Show current for PID
# INFRASTRUCTURE: Low-priority backup
nice -n 19 ionice -c 3 rsync -av /data /backup
# CPULIMIT - Limit CPU percentage
# Install: pacman -S cpulimit
cpulimit -p 1234 -l 50 # Limit PID to 50% CPU
cpulimit -e nginx -l 30 # Limit by name
# PROCESSOR AFFINITY (pin to CPUs)
taskset -c 0,1 command # Run on CPUs 0 and 1
taskset -pc 0,1 1234 # Change running process
taskset -p 1234 # Show current affinity
# CHRT - Real-time priority
# DANGEROUS: Can hang system if misused
chrt -f 1 command # FIFO scheduler, priority 1
chrt -r 1 command # Round-robin scheduler
chrt -p 1234 # Show current
# RESOURCE LIMITS (ulimit)
ulimit -a # Show all limits
ulimit -n # Open files limit
ulimit -n 65536 # Increase (current shell)
# PERMANENT LIMITS: /etc/security/limits.conf
# evanusmodestus soft nofile 65536
# evanusmodestus hard nofile 65536
# * soft nproc 4096
# CGROUPS v2 (systemd)
# View resource usage
systemctl status sshd # Shows cgroup
systemd-cgtop # Top for cgroups
# Set limits via systemd
systemctl set-property nginx.service CPUQuota=50%
systemctl set-property nginx.service MemoryMax=512M
Process Debugging
# STRACE - System call tracer
strace command # Trace new process
strace -p 1234 # Attach to running process
strace -f command # Follow forks
# Useful strace options
strace -e open command # Only file opens
strace -e network command # Network calls
strace -e trace=file command # File-related calls
strace -e trace=process command # Process calls (fork, exec)
strace -c command # Statistics summary
strace -t command # Timestamps
strace -T command # Time per call
# COMMON STRACE PATTERNS
# What files is it opening?
strace -e openat,open command 2>&1 | grep -v ENOENT
# What network connections?
strace -e connect,accept command
# Why is it slow?
strace -c -p 1234 # Attach and get stats
# Look for high count on slow calls (read, write, poll)
# What config is it reading?
strace -e openat cat /etc/passwd 2>&1 | head -10
# LTRACE - Library call tracer
ltrace command # Trace library calls
ltrace -e malloc command # Specific function
# LSOF - List open files
lsof # All open files (huge!)
lsof -p 1234 # Files for specific PID
lsof -u username # Files for user
lsof -i # Network connections
lsof -i :80 # Who's using port 80
lsof +D /var/log # Files in directory
lsof /path/to/file # Who has file open
# FUSER - Find processes using files
fuser -v /var/log/messages # Who's using this file
fuser -k /mnt/usb # Kill processes using mount
fuser -v 80/tcp # Who's using port 80
# PROC FILESYSTEM DEBUG
# CPU info
cat /proc/cpuinfo | grep -E '^(processor|model name)'
# Memory info
cat /proc/meminfo | grep -E '^(MemTotal|MemFree|MemAvailable|Cached)'
# Kernel info
cat /proc/version
cat /proc/cmdline # Boot parameters
# Process-specific
cat /proc/1234/stack # Kernel stack (needs root)
cat /proc/1234/syscall # Current syscall
# INFRASTRUCTURE: Debug hanging process
# 1. Find what it's waiting on
cat /proc/1234/wchan # Kernel wait channel
# 2. Check file descriptors
ls -l /proc/1234/fd | head -20 # What's it connected to
# 3. Stack trace
cat /proc/1234/stack # Where in kernel
# 4. Full strace
strace -p 1234 -o /tmp/strace.log &
sleep 10
kill %1
head -100 /tmp/strace.log
Background Jobs & Daemons
# BASIC BACKGROUND
command & # Run in background
command > /tmp/out.log 2>&1 & # With output redirect
# JOB CONTROL
jobs # List jobs
jobs -l # With PIDs
fg # Bring last to foreground
fg %2 # Bring job 2
bg %1 # Continue job 1 in background
# SURVIVE LOGOUT
nohup command & # Classic method
nohup command > /var/log/myapp.log 2>&1 &
disown %1 # Detach from shell
disown -a # Detach all
# SCREEN/TMUX (better for interactive)
screen -S mysession # Start named session
screen -d -r mysession # Reattach
Ctrl+A d # Detach
tmux new -s mysession # Start named session
tmux attach -t mysession # Reattach
Ctrl+B d # Detach
# SETSID - New session
setsid command # Run in new session
# No controlling terminal, won't receive signals from shell
# SYSTEMD USER UNITS (proper way for user daemons)
mkdir -p ~/.config/systemd/user/
cat > ~/.config/systemd/user/myapp.service <<'EOF'
[Unit]
Description=My Application
[Service]
ExecStart=/path/to/myapp
Restart=always
RestartSec=5
[Install]
WantedBy=default.target
EOF
systemctl --user daemon-reload
systemctl --user enable --now myapp
# Check status
systemctl --user status myapp
journalctl --user -u myapp -f
# INFRASTRUCTURE: Port forward as service
cat > ~/.config/systemd/user/wazuh-dashboard-pf.service <<'EOF'
[Unit]
Description=Wazuh Dashboard Port Forward
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart=/usr/bin/kubectl -n wazuh port-forward service/wazuh-dashboard 8443:443 --address 0.0.0.0
Restart=always
RestartSec=10
[Install]
WantedBy=default.target
EOF
systemctl --user enable --now wazuh-dashboard-pf
Control Groups (cgroups)
# CGROUPS V2 (unified hierarchy, systemd default)
# View current cgroups
cat /proc/cgroups
mount | grep cgroup
# SYSTEMD CGROUP MANAGEMENT
systemd-cgtop # Top for cgroups
systemd-cgls # Cgroup tree
# VIEW SERVICE CGROUP
systemctl status nginx # Shows CGroup line
cat /sys/fs/cgroup/system.slice/nginx.service/cgroup.controllers
# SET RESOURCE LIMITS VIA SYSTEMD
# CPU limit (50%)
systemctl set-property nginx.service CPUQuota=50%
# Memory limit (512MB hard, 256MB high water)
systemctl set-property nginx.service MemoryMax=512M
systemctl set-property nginx.service MemoryHigh=256M
# I/O weight (10-10000, default 100)
systemctl set-property nginx.service IOWeight=50
# VIEW CURRENT LIMITS
systemctl show nginx.service | grep -E '^(CPU|Memory|IO)'
# OVERRIDE FILE (persistent)
mkdir -p /etc/systemd/system/nginx.service.d/
cat > /etc/systemd/system/nginx.service.d/limits.conf <<'EOF'
[Service]
CPUQuota=50%
MemoryMax=512M
EOF
systemctl daemon-reload
systemctl restart nginx
# MANUAL CGROUP OPERATIONS (cgroups v2)
# Create cgroup
mkdir /sys/fs/cgroup/mygroup
# Set limits
echo "50000 100000" > /sys/fs/cgroup/mygroup/cpu.max # 50% CPU
echo "536870912" > /sys/fs/cgroup/mygroup/memory.max # 512MB
# Add process to cgroup
echo 1234 > /sys/fs/cgroup/mygroup/cgroup.procs
# CGEXEC (cgroup-tools)
# Install: pacman -S libcgroup / apt install cgroup-tools
cgexec -g cpu,memory:/mygroup command
# SLICE HIERARCHY
# systemd organizes:
# -.slice (root)
# ├── system.slice (system services)
# ├── user.slice (user sessions)
# │ └── user-1000.slice (specific user)
# └── machine.slice (VMs/containers)
# VIEW HIERARCHY
systemd-cgls
ls /sys/fs/cgroup/
# INFRASTRUCTURE: Limit Vault memory
mkdir -p /etc/systemd/system/vault.service.d/
cat > /etc/systemd/system/vault.service.d/limits.conf <<'EOF'
[Service]
MemoryMax=2G
MemoryHigh=1536M
CPUQuota=200%
EOF
systemctl daemon-reload
systemctl restart vault
Performance Analysis
# LOAD AVERAGE
uptime # Quick view
cat /proc/loadavg # Raw numbers
# 1.00 = 1 CPU fully utilized
# 4.00 on 4-core = all cores busy
# CPU USAGE
mpstat 1 5 # Per-CPU stats every 1s, 5 times
mpstat -P ALL 1 # All CPUs
# MEMORY USAGE
free -h # Human readable
vmstat 1 5 # Virtual memory stats
# vmstat columns:
# r = processes waiting for CPU
# b = processes in uninterruptible sleep
# si = swap in (bad if non-zero)
# so = swap out (bad if non-zero)
# I/O STATS
iostat -x 1 5 # Extended I/O stats
iotop # Top for I/O
iotop -o # Only active I/O
# NETWORK
sar -n DEV 1 5 # Network interface stats
ss -s # Socket statistics
# PERF (advanced)
perf top # Top functions
perf record command # Record profile
perf report # View profile
# Process-specific
perf record -p 1234 sleep 30 # Profile PID for 30s
perf report
# FLAMEGRAPHS
perf record -F 99 -g -p 1234 sleep 30
perf script | stackcollapse-perf.pl | flamegraph.pl > flame.svg
# QUICK PERFORMANCE CHECK
echo "=== CPU ===" && mpstat 1 1 | tail -1
echo "=== Memory ===" && free -h | grep Mem
echo "=== I/O ===" && iostat -x 1 1 | tail -2
echo "=== Load ===" && uptime
# INFRASTRUCTURE: Performance across hosts
for host in kvm-01 vault-01 k3s-master-01; do
echo "=== $host ==="
ssh "$host" "uptime" 2>/dev/null | awk -F'load average:' '{print " Load:" $2}'
ssh "$host" "free -h" 2>/dev/null | awk '/Mem:/ {print " Mem:", $3, "/", $2}'
done
# FIND RESOURCE HOGS
echo "=== Top CPU ===" && ps -eo pid,user,%cpu,cmd --sort=-%cpu | head -5
echo "=== Top Memory ===" && ps -eo pid,user,%mem,rss,cmd --sort=-%mem | head -5
echo "=== Top I/O ===" && iotop -b -n 1 | head -5
Process Gotchas
# WRONG: Kill with SIGKILL first
kill -9 1234 # Process can't clean up
# CORRECT: Graceful first, force if needed
kill 1234 # SIGTERM
sleep 2
kill -0 1234 2>/dev/null && kill -9 1234 # Force if still running
# WRONG: Ignoring zombie processes
# Zombies mean parent isn't reaping children
# CORRECT: Find and fix parent
ps aux | awk '$8=="Z" {print "Zombie:", $2, "Parent:", $3}'
# Kill or fix parent process
# WRONG: nohup without redirect
nohup command & # Creates nohup.out in current dir
# CORRECT: Explicit redirect
nohup command > /var/log/myapp.log 2>&1 &
# WRONG: Using kill -9 on D state process
# Uninterruptible sleep (D) cannot receive signals!
kill -9 1234 # Won't work
# CORRECT: Fix underlying I/O problem
# D state usually means NFS hang, disk issue, or kernel bug
lsof -p 1234 # What's it waiting on
cat /proc/1234/stack # Kernel stack
# WRONG: Nice value confusion
nice -n 10 command # LOWER priority
nice -n -10 command # HIGHER priority
# CORRECT: Remember negative = higher priority
# -20 = highest, 19 = lowest
# WRONG: Assuming pkill matches exact name
pkill python # Kills ALL processes with "python" anywhere
# CORRECT: Be specific
pkill -x python # Exact match only
pkill -f "python app.py" # Match full command
# WRONG: Background job survives logout (sometimes)
command & # May be killed on logout
# CORRECT: Ensure survival
nohup command &
# Or
disown %1
# Or
setsid command
# WRONG: Checking if process exists with ps
if ps aux | grep myprocess; then # grep matches itself!
# CORRECT: Use pgrep or filter grep
if pgrep -x myprocess > /dev/null; then
echo "Running"
fi
Quick Reference
# INSPECTION
ps auxf # Process tree
pgrep -a pattern # Find by name
pstree -p # Hierarchy with PIDs
top/htop # Interactive monitor
# CONTROL
kill PID # SIGTERM (graceful)
kill -9 PID # SIGKILL (force)
pkill -f "pattern" # Kill by command
timeout 30 command # Kill after 30s
# BACKGROUND
command & # Background
nohup command > log 2>&1 & # Survive logout
disown %1 # Detach job
screen -S name # Session manager
# PRIORITY
nice -n 19 command # Lowest priority
ionice -c 3 command # Idle I/O
renice -n 10 -p PID # Change running
# LIMITS
ulimit -a # Show limits
ulimit -n 65536 # Open files
systemctl set-property svc CPUQuota=50%
# DEBUG
strace -p PID # System calls
lsof -p PID # Open files
lsof -i :80 # Port usage
/proc/PID/ # Process info
# PERFORMANCE
uptime # Load average
vmstat 1 # Memory/CPU
iostat -x 1 # I/O stats