awk — Scripting Patterns

Field extractor in a pipeline — top 5 processes by memory
ps aux --sort=-%mem | awk 'NR>1 && NR<=6 {printf "%-10s %6s %5.1f%%\n", $1, $2, $4}' | sort -t'%' -k1 -rn
Awk output as shell variable — capture highest CPU PID
top_pid=$(ps aux --sort=-%cpu | awk 'NR==2 {print $2}')
echo "Highest CPU PID: ${top_pid}"
Awk output as multiple variables via read
read -r used avail <<< "$(df -h / | awk 'NR==2 {print $3, $4}')"
echo "Root filesystem — used: ${used}, available: ${avail}"
Awk as a conditional in if — check if swap is active
if awk 'NR==2 && $3 > 0 {found=1} END{exit !found}' /proc/swaps 2>/dev/null; then
    echo "Swap is in use"
else
    echo "No swap activity"
fi
Awk as a conditional in while — react to high load
uptime | awk -F'load average: ' '{split($2,a,", "); if(a[1]+0 > 2.0) exit 0; else exit 1}' && echo "Load is HIGH" || echo "Load is normal"
Generate shell commands with awk — rename .log files by date
ls -l /var/log/*.log 2>/dev/null | awk 'NF>=9 {
    old=$NF
    split($6,d,"-")
    printf "# %s -> archived\nmv -- '\''%s'\'' '\''%s.%s%s%s.bak'\''\n", old, old, old, d[1], d[2], d[3]
}' | head -20
# Review output, then pipe to bash: | bash
Generate safe systemctl restart commands from failed units
systemctl --no-pager --plain list-units --state=failed 2>/dev/null | awk 'NR>1 && NF>1 && $1 !~ /^$/ {
    printf "echo \"Restarting %s\" && sudo systemctl restart %s\n", $1, $1
}'
# Review output, then pipe to bash if safe
Process substitution — diff two system snapshots
diff <(ss -tlnp | awk 'NR>1 {split($4,a,":"); print a[length(a)]}' | sort -n) \
     <(awk -F: 'NR>1 && $2 !~ /^[[:space:]]*$/ {printf "%d\n", "0x"$2}' /proc/net/tcp | sort -n)
# Compares ss listening ports vs /proc/net/tcp — finds discrepancies
Process substitution — compare package lists between two commands
diff <(pacman -Qe | awk '{print $1}' | sort) \
     <(pacman -Qd | awk '{print $1}' | sort) \
     | head -30
# Shows packages in explicit but not dependency (and vice versa)
For loop processing multiple files — extract error counts
for f in /var/log/pacman.log /var/log/Xorg.0.log; do
    [ -r "$f" ] && printf "%-30s errors: %d\n" "$f" \
        "$(awk 'tolower($0) ~ /error/ {n++} END{print n+0}' "$f")"
done
For loop with glob — summarize line counts across configs
for f in /etc/*.conf; do
    [ -f "$f" ] && awk 'END{printf "%-40s %d lines\n", FILENAME, NR}' "$f"
done | sort -k2 -rn | head -10
Awk one-liner saved as reusable script — CSV to aligned table
cat > /tmp/csv2table.awk <<'SCRIPT'
BEGIN { FS="," }
{
    for(i=1;i<=NF;i++){
        gsub(/^[[:space:]]+|[[:space:]]+$/,"",$i)
        if(length($i)>w[i]) w[i]=length($i)
        data[NR][i]=$i
    }
    cols=NF; rows=NR
}
END {
    for(r=1;r<=rows;r++){
        for(c=1;c<=cols;c++) printf "%-*s  ", w[c], data[r][c]
        print ""
    }
}
SCRIPT
echo -e "name,role,status\nalice,admin,active\nbob,user,locked" | awk -f /tmp/csv2table.awk
Pass shell variables into awk safely with -v
threshold=50
target_user=$(whoami)
ps aux | awk -v thresh="$threshold" -v user="$target_user" \
    '$1==user && $4 > thresh {printf "PID %s using %.1f%% mem: %s\n", $2, $4, $11}'
Pass dynamic date into awk with -v — filter today’s journal entries
today=$(date +%b\ %d)
journalctl --no-pager -n 500 2>/dev/null | awk -v d="$today" '$0 ~ d && /error/i {print NR": "$0}' | tail -10
Awk with -v and a loop — per-user process count
for user in root $(whoami); do
    count=$(ps aux | awk -v u="$user" '$1==u {n++} END{print n+0}')
    printf "%-12s %d processes\n" "$user" "$count"
done
Count files per directory with awk totaling — codex inventory pattern
for d in docs/modules/ROOT/partials/codex/{awk,grep,rg,sed,find,xargs}; do
    printf "%-8s %d\n" "$(basename "$d")" "$(find "$d" -name '*.adoc' | wc -l)"
done | awk '{sum+=$2; print} END{printf "%-8s %d\n", "TOTAL", sum}'
Generic directory-count aggregator — reusable for any tree
find docs/modules/ROOT/pages/codex -mindepth 1 -maxdepth 1 -type d | while read -r d; do
    printf "%-20s %d\n" "$(basename "$d")" "$(find "$d" -name '*.adoc' | wc -l)"
done | awk '{sum+=$2; print} END{printf "%-20s %d\n", "TOTAL", sum}' | sort -k2 -rn