Drill 05: Advanced Operations
sort_by, group_by, reduce, and aggregation patterns for data analysis.
Run This Drill
bash ~/atelier/_bibliotheca/domus-captures/docs/modules/ROOT/examples/jq-drills/05-advanced.sh
Drill Script
#!/bin/bash
# JQ DRILL 05: ADVANCED OPERATIONS
# Paste this entire script into your terminal
# Topics: reduce, group_by, unique_by, sort_by, add, math
cat << 'EOF' > /tmp/jq-05.json
{
"sessions": [
{"user": "admin", "host": "web-01", "duration": 120, "bytes": 45000},
{"user": "admin", "host": "web-02", "duration": 45, "bytes": 12000},
{"user": "alice", "host": "db-01", "duration": 300, "bytes": 89000},
{"user": "bob", "host": "web-01", "duration": 60, "bytes": 23000},
{"user": "alice", "host": "web-01", "duration": 180, "bytes": 56000},
{"user": "admin", "host": "db-01", "duration": 90, "bytes": 34000},
{"user": "bob", "host": "api-01", "duration": 30, "bytes": 8000}
],
"hosts": ["web-01", "web-02", "db-01", "api-01"]
}
EOF
echo "=================================================================="
echo " JQ DRILL 05: ADVANCED OPERATIONS "
echo "=================================================================="
echo ""
echo "Test file: /tmp/jq-05.json"
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.1: SORTING"
echo "Sort arrays with sort, sort_by"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq '.hosts | sort' /tmp/jq-05.json"
jq '.hosts | sort' /tmp/jq-05.json
echo ""
echo "Command: jq '.sessions | sort_by(.duration)' /tmp/jq-05.json"
jq '.sessions | sort_by(.duration)' /tmp/jq-05.json
echo ""
echo "Command: jq '.sessions | sort_by(.duration) | reverse' /tmp/jq-05.json"
echo "(Descending)"
jq '.sessions | sort_by(.duration) | reverse' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.2: UNIQUE"
echo "Remove duplicates"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq '[.sessions[].user] | unique' /tmp/jq-05.json"
jq '[.sessions[].user] | unique' /tmp/jq-05.json
echo ""
echo "Command: jq '[.sessions[].host] | unique | sort' /tmp/jq-05.json"
jq '[.sessions[].host] | unique | sort' /tmp/jq-05.json
echo ""
echo "Command: jq '.sessions | unique_by(.user)' /tmp/jq-05.json"
echo "(First occurrence of each user)"
jq '.sessions | unique_by(.user)' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.3: GROUPING"
echo "group_by creates nested arrays"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq '.sessions | group_by(.user)' /tmp/jq-05.json"
jq '.sessions | group_by(.user)' /tmp/jq-05.json
echo ""
echo "Command: jq '.sessions | group_by(.user) | map({user: .[0].user, count: length})' /tmp/jq-05.json"
jq '.sessions | group_by(.user) | map({user: .[0].user, count: length})' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.4: AGGREGATION WITH add"
echo "Sum arrays of numbers"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq '[.sessions[].duration] | add' /tmp/jq-05.json"
jq '[.sessions[].duration] | add' /tmp/jq-05.json
echo ""
echo "Command: jq '[.sessions[].bytes] | add' /tmp/jq-05.json"
jq '[.sessions[].bytes] | add' /tmp/jq-05.json
echo ""
echo "Command: jq '[.sessions[] | select(.user == \"admin\") | .duration] | add' /tmp/jq-05.json"
echo "(Sum for admin only)"
jq '[.sessions[] | select(.user == "admin") | .duration] | add' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.5: MATH OPERATIONS"
echo "min, max, add (sum), length (count)"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq '[.sessions[].duration] | min' /tmp/jq-05.json"
jq '[.sessions[].duration] | min' /tmp/jq-05.json
echo ""
echo "Command: jq '[.sessions[].duration] | max' /tmp/jq-05.json"
jq '[.sessions[].duration] | max' /tmp/jq-05.json
echo ""
echo "Command: Calculate average duration"
echo "jq '[.sessions[].duration] | add / length' /tmp/jq-05.json"
jq '[.sessions[].duration] | add / length' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.6: REDUCE"
echo "Accumulate values (like fold/inject)"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq 'reduce .sessions[] as \$s (0; . + \$s.duration)' /tmp/jq-05.json"
echo "(Sum durations)"
jq 'reduce .sessions[] as $s (0; . + $s.duration)' /tmp/jq-05.json
echo ""
echo "Command: Build user -> total duration map"
echo 'jq '"'"'reduce .sessions[] as $s ({}; .[$s.user] += $s.duration)'"'"' /tmp/jq-05.json'
jq 'reduce .sessions[] as $s ({}; .[$s.user] += $s.duration)' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.7: TOP N PATTERN"
echo "Sort, limit, format"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq -r '.sessions | sort_by(.bytes) | reverse | .[0:3] | .[] | \"\\(.user)@\\(.host): \\(.bytes) bytes\"' /tmp/jq-05.json"
echo "(Top 3 by bytes)"
jq -r '.sessions | sort_by(.bytes) | reverse | .[0:3] | .[] | "\(.user)@\(.host): \(.bytes) bytes"' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 5.8: STATISTICS BY GROUP"
echo "Combine grouping with aggregation"
echo "------------------------------------------------------------------"
echo ""
echo "Command:"
echo 'jq '"'"'.sessions | group_by(.user) | map({
user: .[0].user,
session_count: length,
total_duration: [.[].duration] | add,
total_bytes: [.[].bytes] | add
})'"'"' /tmp/jq-05.json'
jq '.sessions | group_by(.user) | map({user: .[0].user, session_count: length, total_duration: [.[].duration] | add, total_bytes: [.[].bytes] | add})' /tmp/jq-05.json
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "YOUR TURN - TRY THESE:"
echo "------------------------------------------------------------------"
echo ""
echo "1. Find user with most total bytes:"
echo " jq '.sessions | group_by(.user) | map({user: .[0].user, bytes: [.[].bytes] | add}) | sort_by(.bytes) | last' /tmp/jq-05.json"
echo ""
echo "2. Count sessions per host:"
echo " jq '.sessions | group_by(.host) | map({host: .[0].host, count: length})' /tmp/jq-05.json"
echo ""
echo "3. Average bytes per session:"
echo " jq '[.sessions[].bytes] | add / length | floor' /tmp/jq-05.json"
echo ""
echo "------------------------------------------------------------------"
echo "KEY TAKEAWAYS:"
echo "1. sort_by(.field) - sort by field"
echo "2. unique / unique_by - deduplicate"
echo "3. group_by(.field) - create nested groups"
echo "4. add - sum arrays"
echo "5. min/max - extremes"
echo "6. reduce .[] as \$x (init; update) - accumulator"
echo "7. .[0:N] - slice first N elements"
echo "------------------------------------------------------------------"