AWK

AWK

Attribute Value

Goal

Expert AWK for data processing

Interest Link

Systems Tools > Text Processing

Status

In Progress

Documentation

Codex awk section, domus-linux-ops awk-mastery

Skill Areas

Area Description Status

Field Processing

$1, $2, $NF, FS, OFS

[x] Proficient

Patterns

/regex/, ranges, BEGIN/END

[x] Proficient

Variables

Built-in (NR, NF, FILENAME)

[x] Proficient

Arrays

Associative arrays, loops

[ ] In Progress

Functions

User-defined, string, math

[ ] In Progress

Multi-file

FNR, FILENAME patterns

[ ] In Progress

Field Extraction

# Default: whitespace delimiter
awk '{print $1}' file                    # First field
awk '{print $1, $3}' file                # Fields 1 and 3
awk '{print $NF}' file                   # Last field
awk '{print $(NF-1)}' file               # Second to last

# Custom delimiter
awk -F':' '{print $1}' /etc/passwd       # Colon separated
awk -F',' '{print $2}' data.csv          # CSV
awk -F'\t' '{print $1}' data.tsv         # Tab separated
awk -F'[,;:]' '{print $1}' file          # Multiple delimiters

# Output field separator
awk -F':' -v OFS=',' '{print $1, $3, $6}' /etc/passwd

# Field range (print fields 2 through 5)
awk '{for(i=2; i<=5; i++) printf "%s ", $i; print ""}' file

# All fields except first
awk '{$1=""; print substr($0,2)}' file

# Reformat output
awk -F':' '{printf "%-20s %s\n", $1, $6}' /etc/passwd

# Infrastructure: Parse ISE session fields
netapi ise mnt sessions --format json | jq -r '.[] | [.calling_station_id, .user_name, .nas_ip_address] | @tsv' | \
    awk -F'\t' '{printf "MAC: %-20s User: %-15s Switch: %s\n", $1, $2, $3}'

# Infrastructure: Extract IP from interface output
ip -4 -o addr show | awk '{print $2, $4}' | awk -F'/' '{print $1}'

# Extract specific columns from ps
ps aux | awk '{printf "%-10s %5s %5s %s\n", $1, $2, $3, $11}'

Built-in Variables

# Record/Field variables
# NR  - Current record (line) number (across all files)
# FNR - Current record number in current file
# NF  - Number of fields in current record
# $0  - Entire current record
# $n  - nth field

# Separator variables
# FS  - Input field separator (default: whitespace)
# OFS - Output field separator (default: space)
# RS  - Input record separator (default: newline)
# ORS - Output record separator (default: newline)

# Print line numbers
awk '{print NR, $0}' file                # Line number + content

# Field count per line
awk '{print NR, NF, "fields:", $0}' file

# Process specific line
awk 'NR==5 {print}' file                 # Only line 5
awk 'NR>=5 && NR<=10' file               # Lines 5-10
awk 'NR>1' file                          # Skip header

# Last field of each line
awk '{print $NF}' file

# Process multiple files, track per-file line number
awk 'FNR==1 {print "=== " FILENAME " ==="} {print FNR, $0}' file1 file2

# Change output separator
awk 'BEGIN {OFS=","} {print $1, $2, $3}' file
awk 'BEGIN {ORS="; "} {print $1}' file   # Semicolon between records

# Multi-character field separator
awk 'BEGIN {FS="::"} {print $1, $2}' file

# Infrastructure: Format /etc/passwd as CSV
awk 'BEGIN {FS=":"; OFS=","} {print $1, $3, $6}' /etc/passwd

# Infrastructure: Numbered list of hosts
awk '{printf "%3d. %s\n", NR, $0}' hosts.txt

Conditionals and Control Flow

# if/else
awk '{if($3 > 100) print "HIGH:", $0; else print "LOW:", $0}' file

# Ternary operator
awk '{status = ($3 > 100) ? "HIGH" : "LOW"; print status, $0}' file

# Multiple conditions
awk '{
    if($1 == "ERROR") print "ERROR:", $0
    else if($1 == "WARN") print "WARNING:", $0
    else print "INFO:", $0
}' file

# next - skip to next record
awk '/skip/ {next} {print}' file         # Skip lines with "skip"

# exit - stop processing
awk '/STOP/ {exit} {print}' file         # Print until STOP

# exit with code
awk 'END {exit (NR > 100) ? 1 : 0}' file # Exit 1 if >100 lines

# While loop
awk '{i=1; while(i<=NF) {print $i; i++}}' file

# For loop
awk '{for(i=1; i<=NF; i++) print i, $i}' file

# Infrastructure: Categorize log levels
cat /var/log/messages | awk '{
    level = "INFO"
    if(/ERROR|FATAL|CRIT/) level = "CRITICAL"
    else if(/WARN|WARNING/) level = "WARNING"
    else if(/DEBUG/) level = "DEBUG"
    print level, $0
}'

# Infrastructure: Health status
netapi ise mnt sessions --format json | jq -r '.[] | [.calling_station_id, .authentication_status] | @tsv' | \
    awk -F'\t' '{
        if($2 == "AUTHENTICATED") status = "✓"
        else if($2 == "FAILED") status = "✗"
        else status = "?"
        printf "%s %s\n", status, $1
    }'

Infrastructure Patterns

# SSH CA cert status across hosts
~/.local/bin/vault-ssh-test 2>&1 | awk '/===/ {host=$2} /Success|FAILED/ {
    status = /Success/ ? "✓" : "✗"
    printf "%s %-20s\n", status, host
}'

# k8s pod resource summary
kubectl top pods -A --no-headers | awk '{
    ns=$1; pod=$2
    gsub(/m$/, "", $3); cpu=$3
    gsub(/Mi$/, "", $4); mem=$4
    ns_cpu[ns] += cpu
    ns_mem[ns] += mem
    ns_count[ns]++
}
END {
    printf "%-20s %8s %10s %6s\n", "NAMESPACE", "CPU(m)", "MEM(Mi)", "PODS"
    printf "%-20s %8s %10s %6s\n", "---------", "------", "-------", "----"
    for(ns in ns_cpu) {
        printf "%-20s %8d %10d %6d\n", ns, ns_cpu[ns], ns_mem[ns], ns_count[ns]
    }
}'

# ISE session pivot table (by switch and status)
netapi ise mnt sessions --format json | jq -r '.[] | [.nas_ip_address, .authentication_status] | @tsv' | \
    awk -F'\t' '{
        pivot[$1][$2]++
        total[$1]++
    }
    END {
        printf "%-20s %10s %10s %10s\n", "SWITCH", "PASSED", "FAILED", "TOTAL"
        for(sw in total) {
            printf "%-20s %10d %10d %10d\n", sw, pivot[sw]["AUTHENTICATED"]+0, pivot[sw]["FAILED"]+0, total[sw]
        }
    }'

# Log analysis: Requests per minute
awk '{
    split($4, t, ":")
    minute = t[2] ":" t[3]
    count[minute]++
}
END {
    for(m in count) print m, count[m]
}' access.log | sort

# Certificate expiry check
find /etc/ssl/certs -name "*.pem" -type f 2>/dev/null | while read cert; do
    openssl x509 -in "$cert" -noout -enddate 2>/dev/null
done | awk -F'=' '{
    cmd = "date -d \"" $2 "\" +%s"
    cmd | getline exp
    close(cmd)
    now = systime()
    days = int((exp - now) / 86400)
    if(days < 30) printf "⚠️  %d days: %s\n", days, $2
}'

# Parse netstat/ss output
ss -tlnp | awk 'NR>1 {
    split($4, addr, ":")
    port = addr[length(addr)]
    gsub(/.*"/, "", $6)
    gsub(/".*/, "", $6)
    printf "%-6s %-20s\n", port, $6
}' | sort -n

# Vault audit log analysis
sudo cat /var/log/vault/audit.log | jq -r '[.time, .type, .request.operation, .request.path] | @tsv' | \
    awk -F'\t' '{
        ops[$3]++
        paths[$4]++
    }
    END {
        print "=== Operations ==="
        for(op in ops) printf "%-15s %d\n", op, ops[op]
        print "\n=== Top Paths ==="
        for(path in paths) printf "%-40s %d\n", path, paths[path]
    }' | sort -k2 -rn | head -20

AWK Gotchas

# WRONG: Forgetting to quote the awk program
awk {print $1} file                      # Shell expands $1!

# CORRECT: Always quote
awk '{print $1}' file

# WRONG: Using shell variables inside single quotes
var="pattern"
awk '/$var/ {print}' file                # Literal $var, not expanded

# CORRECT: Use -v to pass variables
awk -v pat="$var" '$0 ~ pat {print}' file

# Or use double quotes (but escape $ for awk)
awk "/$var/ {print}" file

# WRONG: Integer division when you want float
awk 'BEGIN {print 3/4}'                  # 0.75 - actually works in awk
awk 'BEGIN {print int(3/4)}'             # 0 - if you want integer

# WRONG: Comparing numbers as strings
awk '$1 > 9' file                        # "10" < "9" as strings!

# CORRECT: Force numeric comparison
awk '$1+0 > 9' file

# WRONG: Modifying $0 without recalculating fields
awk '{gsub(/old/, "new"); print $1}' file  # $1 is from BEFORE gsub

# CORRECT: Reference $0 or re-parse
awk '{gsub(/old/, "new"); print $0}' file

# WRONG: Assuming fields exist
awk '{print $10}' file                   # Empty if <10 fields

# CORRECT: Check first
awk 'NF >= 10 {print $10}' file

# WRONG: Regex in variable with slashes
awk -v pat="/var/log" '$0 ~ pat' file    # Fails!

# CORRECT: Escape or use different approach
awk -v pat="/var/log" 'index($0, pat)' file

# WRONG: Using = instead of == for comparison
awk '$1 = "value"' file                  # Assignment, not comparison!

# CORRECT: Use == for comparison
awk '$1 == "value"' file

# WRONG: Expecting associative array order
awk '{a[$1]++} END {for(k in a) print k}' file  # Order not guaranteed

# CORRECT: Sort externally or use PROCINFO (gawk)
awk '{a[$1]++} END {for(k in a) print k}' file | sort