Drill 01: Pipeline Basics
Shell pipeline fundamentals: pipes, tee for debugging, xargs for command building.
Run This Drill
bash ~/atelier/_bibliotheca/domus-captures/docs/modules/ROOT/examples/etl-drills/01-pipeline-basics.sh
Drill Script
#!/bin/bash
# ETL DRILL 01: PIPELINE BASICS
# Paste this entire script into your terminal
# Topics: Pipes, tee, process substitution, intermediate files
echo "=================================================================="
echo " ETL DRILL 01: PIPELINE BASICS "
echo "=================================================================="
echo ""
echo "ETL = Extract, Transform, Load"
echo "In CLI: Extract (curl/cat/query) | Transform (jq/awk/sed) | Load (file/db/api)"
echo ""
# Create test data
cat << 'EOF' > /tmp/servers.json
{"servers": [
{"hostname": "ise-01", "ip": "10.50.1.20", "cpu": 45, "status": "active"},
{"hostname": "ise-02", "ip": "10.50.1.21", "cpu": 78, "status": "active"},
{"hostname": "bind-01", "ip": "10.50.1.90", "cpu": 5, "status": "active"},
{"hostname": "vault-01", "ip": "10.50.1.132", "cpu": 12, "status": "standby"}
]}
EOF
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.1: BASIC PIPE"
echo "Each | sends stdout of left to stdin of right"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Extract → Transform → Load"
echo "cat /tmp/servers.json | jq -r '.servers[].hostname' | sort"
cat /tmp/servers.json | jq -r '.servers[].hostname' | sort
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.2: TEE - INSPECT PIPELINE STAGES"
echo "tee writes to file AND passes through"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Pipeline with inspection"
echo "cat /tmp/servers.json | tee /tmp/stage1.json | jq '.servers' | tee /tmp/stage2.json | jq '.[].hostname'"
cat /tmp/servers.json | tee /tmp/stage1.json | jq '.servers' | tee /tmp/stage2.json | jq '.[].hostname'
echo ""
echo "Intermediate files created for debugging:"
echo " /tmp/stage1.json - after extract"
echo " /tmp/stage2.json - after first transform"
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.3: PROCESS SUBSTITUTION"
echo "Treat command output as a file: <(command)"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Compare two command outputs"
echo "diff <(jq -r '.servers[].hostname' /tmp/servers.json | sort) <(echo -e 'bind-01\nise-01\nise-02\nvault-01')"
diff <(jq -r '.servers[].hostname' /tmp/servers.json | sort) <(echo -e 'bind-01\nise-01\nise-02\nvault-01')
echo "(no output = identical)"
echo ""
echo "Command: Join two data sources"
echo "paste <(jq -r '.servers[].hostname' /tmp/servers.json) <(jq -r '.servers[].ip' /tmp/servers.json)"
paste <(jq -r '.servers[].hostname' /tmp/servers.json) <(jq -r '.servers[].ip' /tmp/servers.json)
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.4: XARGS - BUILD COMMANDS FROM INPUT"
echo "Transform input lines into command arguments"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Generate ping commands"
echo "jq -r '.servers[].ip' /tmp/servers.json | xargs -I {} echo 'ping -c 1 {}'"
jq -r '.servers[].ip' /tmp/servers.json | xargs -I {} echo "ping -c 1 {}"
echo ""
echo "Command: Parallel execution (dry run)"
echo "jq -r '.servers[].ip' /tmp/servers.json | xargs -P 4 -I {} echo 'curl -s http://{}:443/health'"
jq -r '.servers[].ip' /tmp/servers.json | xargs -P 4 -I {} echo "curl -s http://{}:443/health"
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.5: SUBSHELL GROUPING"
echo "Group commands with ( ) or { }"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Header + data pattern"
echo "(echo 'HOSTNAME,IP,CPU'; jq -r '.servers[] | [.hostname, .ip, .cpu] | @csv' /tmp/servers.json)"
(echo 'HOSTNAME,IP,CPU'; jq -r '.servers[] | [.hostname, .ip, .cpu] | @csv' /tmp/servers.json)
echo ""
echo "Command: Multiple transforms, same output file"
echo "{ echo '=== Active Servers ==='; jq -r '.servers[] | select(.status==\"active\") | .hostname' /tmp/servers.json; } > /tmp/active.txt"
{ echo '=== Active Servers ==='; jq -r '.servers[] | select(.status=="active") | .hostname' /tmp/servers.json; } > /tmp/active.txt
cat /tmp/active.txt
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.6: HEREDOC IN PIPELINES"
echo "Inline data without temp files"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Inline JSON processing"
cat << 'JSONEOF' | jq -r '.name'
{"name": "inline-test", "value": 123}
JSONEOF
echo ""
echo "Command: Multi-stage with heredoc"
cat << 'JSONEOF' | jq '.items[]' | jq -s 'sort_by(.id)'
{"items": [{"id": 3}, {"id": 1}, {"id": 2}]}
JSONEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 1.7: ERROR HANDLING"
echo "Exit codes and conditional execution"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Continue on error with ||"
echo "(jq '.nonexistent' /tmp/servers.json 2>/dev/null || echo 'null') | head -1"
(jq '.nonexistent' /tmp/servers.json 2>/dev/null || echo 'null') | head -1
echo ""
echo "Command: Stop on error with set -e (in scripts)"
cat << 'SCRIPTEOF'
#!/bin/bash
set -e # Exit on any error
set -o pipefail # Pipe fails if any command fails
cat /tmp/servers.json | jq '.servers' | jq '.[].hostname'
# If any step fails, script exits
SCRIPTEOF
echo "(Script pattern shown - not executed)"
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "YOUR TURN - TRY THESE:"
echo "------------------------------------------------------------------"
echo ""
echo "1. Extract + filter + count:"
echo " cat /tmp/servers.json | jq '.servers[] | select(.cpu > 20)' | jq -s length"
echo ""
echo "2. Generate /etc/hosts entries:"
echo " jq -r '.servers[] | \"\\(.ip) \\(.hostname)\"' /tmp/servers.json"
echo ""
echo "3. Pipeline with tee for debugging:"
echo " cat /tmp/servers.json | tee /dev/stderr | jq '.servers | length'"
echo ""
echo "------------------------------------------------------------------"
echo "KEY TAKEAWAYS:"
echo "1. | pipes stdout to stdin"
echo "2. tee writes to file AND passes through"
echo "3. <(cmd) creates a file-like object from command output"
echo "4. xargs -I {} builds commands from input"
echo "5. ( ) or { } groups commands"
echo "6. set -e -o pipefail for error handling"
echo "------------------------------------------------------------------"