AWK
AWK
| Attribute | Value |
|---|---|
Goal |
Expert AWK for data processing |
Interest Link |
Systems Tools > Text Processing |
Status |
In Progress |
Documentation |
Codex awk section, domus-linux-ops awk-mastery |
Skill Areas
| Area | Description | Status |
|---|---|---|
Field Processing |
$1, $2, $NF, FS, OFS |
[x] Proficient |
Patterns |
/regex/, ranges, BEGIN/END |
[x] Proficient |
Variables |
Built-in (NR, NF, FILENAME) |
[x] Proficient |
Arrays |
Associative arrays, loops |
[ ] In Progress |
Functions |
User-defined, string, math |
[ ] In Progress |
Multi-file |
FNR, FILENAME patterns |
[ ] In Progress |
Field Extraction
# Default: whitespace delimiter
awk '{print $1}' file # First field
awk '{print $1, $3}' file # Fields 1 and 3
awk '{print $NF}' file # Last field
awk '{print $(NF-1)}' file # Second to last
# Custom delimiter
awk -F':' '{print $1}' /etc/passwd # Colon separated
awk -F',' '{print $2}' data.csv # CSV
awk -F'\t' '{print $1}' data.tsv # Tab separated
awk -F'[,;:]' '{print $1}' file # Multiple delimiters
# Output field separator
awk -F':' -v OFS=',' '{print $1, $3, $6}' /etc/passwd
# Field range (print fields 2 through 5)
awk '{for(i=2; i<=5; i++) printf "%s ", $i; print ""}' file
# All fields except first
awk '{$1=""; print substr($0,2)}' file
# Reformat output
awk -F':' '{printf "%-20s %s\n", $1, $6}' /etc/passwd
# Infrastructure: Parse ISE session fields
netapi ise mnt sessions --format json | jq -r '.[] | [.calling_station_id, .user_name, .nas_ip_address] | @tsv' | \
awk -F'\t' '{printf "MAC: %-20s User: %-15s Switch: %s\n", $1, $2, $3}'
# Infrastructure: Extract IP from interface output
ip -4 -o addr show | awk '{print $2, $4}' | awk -F'/' '{print $1}'
# Extract specific columns from ps
ps aux | awk '{printf "%-10s %5s %5s %s\n", $1, $2, $3, $11}'
Built-in Variables
# Record/Field variables
# NR - Current record (line) number (across all files)
# FNR - Current record number in current file
# NF - Number of fields in current record
# $0 - Entire current record
# $n - nth field
# Separator variables
# FS - Input field separator (default: whitespace)
# OFS - Output field separator (default: space)
# RS - Input record separator (default: newline)
# ORS - Output record separator (default: newline)
# Print line numbers
awk '{print NR, $0}' file # Line number + content
# Field count per line
awk '{print NR, NF, "fields:", $0}' file
# Process specific line
awk 'NR==5 {print}' file # Only line 5
awk 'NR>=5 && NR<=10' file # Lines 5-10
awk 'NR>1' file # Skip header
# Last field of each line
awk '{print $NF}' file
# Process multiple files, track per-file line number
awk 'FNR==1 {print "=== " FILENAME " ==="} {print FNR, $0}' file1 file2
# Change output separator
awk 'BEGIN {OFS=","} {print $1, $2, $3}' file
awk 'BEGIN {ORS="; "} {print $1}' file # Semicolon between records
# Multi-character field separator
awk 'BEGIN {FS="::"} {print $1, $2}' file
# Infrastructure: Format /etc/passwd as CSV
awk 'BEGIN {FS=":"; OFS=","} {print $1, $3, $6}' /etc/passwd
# Infrastructure: Numbered list of hosts
awk '{printf "%3d. %s\n", NR, $0}' hosts.txt
Conditionals and Control Flow
# if/else
awk '{if($3 > 100) print "HIGH:", $0; else print "LOW:", $0}' file
# Ternary operator
awk '{status = ($3 > 100) ? "HIGH" : "LOW"; print status, $0}' file
# Multiple conditions
awk '{
if($1 == "ERROR") print "ERROR:", $0
else if($1 == "WARN") print "WARNING:", $0
else print "INFO:", $0
}' file
# next - skip to next record
awk '/skip/ {next} {print}' file # Skip lines with "skip"
# exit - stop processing
awk '/STOP/ {exit} {print}' file # Print until STOP
# exit with code
awk 'END {exit (NR > 100) ? 1 : 0}' file # Exit 1 if >100 lines
# While loop
awk '{i=1; while(i<=NF) {print $i; i++}}' file
# For loop
awk '{for(i=1; i<=NF; i++) print i, $i}' file
# Infrastructure: Categorize log levels
cat /var/log/messages | awk '{
level = "INFO"
if(/ERROR|FATAL|CRIT/) level = "CRITICAL"
else if(/WARN|WARNING/) level = "WARNING"
else if(/DEBUG/) level = "DEBUG"
print level, $0
}'
# Infrastructure: Health status
netapi ise mnt sessions --format json | jq -r '.[] | [.calling_station_id, .authentication_status] | @tsv' | \
awk -F'\t' '{
if($2 == "AUTHENTICATED") status = "✓"
else if($2 == "FAILED") status = "✗"
else status = "?"
printf "%s %s\n", status, $1
}'
Infrastructure Patterns
# SSH CA cert status across hosts
~/.local/bin/vault-ssh-test 2>&1 | awk '/===/ {host=$2} /Success|FAILED/ {
status = /Success/ ? "✓" : "✗"
printf "%s %-20s\n", status, host
}'
# k8s pod resource summary
kubectl top pods -A --no-headers | awk '{
ns=$1; pod=$2
gsub(/m$/, "", $3); cpu=$3
gsub(/Mi$/, "", $4); mem=$4
ns_cpu[ns] += cpu
ns_mem[ns] += mem
ns_count[ns]++
}
END {
printf "%-20s %8s %10s %6s\n", "NAMESPACE", "CPU(m)", "MEM(Mi)", "PODS"
printf "%-20s %8s %10s %6s\n", "---------", "------", "-------", "----"
for(ns in ns_cpu) {
printf "%-20s %8d %10d %6d\n", ns, ns_cpu[ns], ns_mem[ns], ns_count[ns]
}
}'
# ISE session pivot table (by switch and status)
netapi ise mnt sessions --format json | jq -r '.[] | [.nas_ip_address, .authentication_status] | @tsv' | \
awk -F'\t' '{
pivot[$1][$2]++
total[$1]++
}
END {
printf "%-20s %10s %10s %10s\n", "SWITCH", "PASSED", "FAILED", "TOTAL"
for(sw in total) {
printf "%-20s %10d %10d %10d\n", sw, pivot[sw]["AUTHENTICATED"]+0, pivot[sw]["FAILED"]+0, total[sw]
}
}'
# Log analysis: Requests per minute
awk '{
split($4, t, ":")
minute = t[2] ":" t[3]
count[minute]++
}
END {
for(m in count) print m, count[m]
}' access.log | sort
# Certificate expiry check
find /etc/ssl/certs -name "*.pem" -type f 2>/dev/null | while read cert; do
openssl x509 -in "$cert" -noout -enddate 2>/dev/null
done | awk -F'=' '{
cmd = "date -d \"" $2 "\" +%s"
cmd | getline exp
close(cmd)
now = systime()
days = int((exp - now) / 86400)
if(days < 30) printf "⚠️ %d days: %s\n", days, $2
}'
# Parse netstat/ss output
ss -tlnp | awk 'NR>1 {
split($4, addr, ":")
port = addr[length(addr)]
gsub(/.*"/, "", $6)
gsub(/".*/, "", $6)
printf "%-6s %-20s\n", port, $6
}' | sort -n
# Vault audit log analysis
sudo cat /var/log/vault/audit.log | jq -r '[.time, .type, .request.operation, .request.path] | @tsv' | \
awk -F'\t' '{
ops[$3]++
paths[$4]++
}
END {
print "=== Operations ==="
for(op in ops) printf "%-15s %d\n", op, ops[op]
print "\n=== Top Paths ==="
for(path in paths) printf "%-40s %d\n", path, paths[path]
}' | sort -k2 -rn | head -20
AWK Gotchas
# WRONG: Forgetting to quote the awk program
awk {print $1} file # Shell expands $1!
# CORRECT: Always quote
awk '{print $1}' file
# WRONG: Using shell variables inside single quotes
var="pattern"
awk '/$var/ {print}' file # Literal $var, not expanded
# CORRECT: Use -v to pass variables
awk -v pat="$var" '$0 ~ pat {print}' file
# Or use double quotes (but escape $ for awk)
awk "/$var/ {print}" file
# WRONG: Integer division when you want float
awk 'BEGIN {print 3/4}' # 0.75 - actually works in awk
awk 'BEGIN {print int(3/4)}' # 0 - if you want integer
# WRONG: Comparing numbers as strings
awk '$1 > 9' file # "10" < "9" as strings!
# CORRECT: Force numeric comparison
awk '$1+0 > 9' file
# WRONG: Modifying $0 without recalculating fields
awk '{gsub(/old/, "new"); print $1}' file # $1 is from BEFORE gsub
# CORRECT: Reference $0 or re-parse
awk '{gsub(/old/, "new"); print $0}' file
# WRONG: Assuming fields exist
awk '{print $10}' file # Empty if <10 fields
# CORRECT: Check first
awk 'NF >= 10 {print $10}' file
# WRONG: Regex in variable with slashes
awk -v pat="/var/log" '$0 ~ pat' file # Fails!
# CORRECT: Escape or use different approach
awk -v pat="/var/log" 'index($0, pat)' file
# WRONG: Using = instead of == for comparison
awk '$1 = "value"' file # Assignment, not comparison!
# CORRECT: Use == for comparison
awk '$1 == "value"' file
# WRONG: Expecting associative array order
awk '{a[$1]++} END {for(k in a) print k}' file # Order not guaranteed
# CORRECT: Sort externally or use PROCINFO (gawk)
awk '{a[$1]++} END {for(k in a) print k}' file | sort