Drill 02: JSON to CSV

Transform JSON to CSV, TSV, and formatted tables.

Run This Drill

bash ~/atelier/_bibliotheca/domus-captures/docs/modules/ROOT/examples/etl-drills/02-json-to-csv.sh

Drill Script

#!/bin/bash
# ETL DRILL 02: JSON TO CSV
# Paste this entire script into your terminal
# Topics: jq @csv, awk formatting, multi-step transforms

echo "=================================================================="
echo "             ETL DRILL 02: JSON TO CSV                           "
echo "=================================================================="
echo ""

# Create test data
cat << 'EOF' > /tmp/inventory.json
{
  "nodes": [
    {"hostname": "ise-01", "ip": "10.50.1.20", "roles": ["pan", "mnt"], "cpu": 45.5, "memory": 8192},
    {"hostname": "ise-02", "ip": "10.50.1.21", "roles": ["psn"], "cpu": 78.2, "memory": 16384},
    {"hostname": "bind-01", "ip": "10.50.1.90", "roles": ["dns"], "cpu": 5.1, "memory": 4096},
    {"hostname": "vault-01", "ip": "10.50.1.132", "roles": ["secrets", "pki"], "cpu": 12.3, "memory": 8192}
  ]
}
EOF

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 2.1: BASIC jq @csv"
echo "Build array, use @csv filter"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Simple fields to CSV"
echo "jq -r '.nodes[] | [.hostname, .ip, .cpu] | @csv' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, .ip, .cpu] | @csv' /tmp/inventory.json
echo ""

echo "Command: With header"
echo "(echo 'hostname,ip,cpu'; jq -r '.nodes[] | [.hostname, .ip, .cpu] | @csv' /tmp/inventory.json)"
(echo 'hostname,ip,cpu'; jq -r '.nodes[] | [.hostname, .ip, .cpu] | @csv' /tmp/inventory.json)
echo ""

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 2.2: HANDLING ARRAYS IN CSV"
echo "Join array fields for CSV output"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Array to semicolon-separated string"
echo "jq -r '.nodes[] | [.hostname, .ip, (.roles | join(\";\"))] | @csv' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, .ip, (.roles | join(";"))] | @csv' /tmp/inventory.json
echo ""

echo "Command: Array length instead of content"
echo "jq -r '.nodes[] | [.hostname, .ip, (.roles | length)] | @csv' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, .ip, (.roles | length)] | @csv' /tmp/inventory.json
echo ""

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 2.3: TRANSFORMATIONS"
echo "Calculate, format, and enrich"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Add calculated field (memory in GB)"
echo "jq -r '.nodes[] | [.hostname, .ip, (.memory / 1024 | floor | tostring + \" GB\")] | @csv' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, .ip, (.memory / 1024 | floor | tostring + " GB")] | @csv' /tmp/inventory.json
echo ""

echo "Command: Conditional field"
echo "jq -r '.nodes[] | [.hostname, (if .cpu > 50 then \"HIGH\" else \"NORMAL\" end)] | @csv' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, (if .cpu > 50 then "HIGH" else "NORMAL" end)] | @csv' /tmp/inventory.json
echo ""

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 2.4: TSV AND CUSTOM DELIMITERS"
echo "Tab-separated and other formats"
echo "------------------------------------------------------------------"
echo ""
echo "Command: @tsv for tab-separated"
echo "jq -r '.nodes[] | [.hostname, .ip, .cpu] | @tsv' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, .ip, .cpu] | @tsv' /tmp/inventory.json
echo ""

echo "Command: Custom delimiter with join"
echo "jq -r '.nodes[] | [.hostname, .ip, .cpu] | join(\" | \")' /tmp/inventory.json"
jq -r '.nodes[] | [.hostname, .ip, .cpu] | join(" | ")' /tmp/inventory.json
echo ""

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 2.5: AWK POST-PROCESSING"
echo "When jq isn't enough"
echo "------------------------------------------------------------------"
echo ""
echo "Command: jq to TSV, awk for formatting"
echo "jq -r '.nodes[] | [.hostname, .ip, .cpu] | @tsv' /tmp/inventory.json | awk -F'\\t' '{printf \"%-12s %-15s %6.1f%%\\n\", \$1, \$2, \$3}'"
jq -r '.nodes[] | [.hostname, .ip, .cpu] | @tsv' /tmp/inventory.json | awk -F'\t' '{printf "%-12s %-15s %6.1f%%\n", $1, $2, $3}'
echo ""

echo "Command: jq + awk for summary row"
echo "jq -r '.nodes[] | [.hostname, .cpu] | @tsv' /tmp/inventory.json | awk -F'\\t' '{print; sum+=\$2; count++} END {printf \"AVERAGE: %.1f%%\\n\", sum/count}'"
jq -r '.nodes[] | [.hostname, .cpu] | @tsv' /tmp/inventory.json | awk -F'\t' '{print; sum+=$2; count++} END {printf "AVERAGE: %.1f%%\n", sum/count}'
echo ""

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 2.6: COMPLETE ETL EXAMPLE"
echo "Extract → Transform → Load to file"
echo "------------------------------------------------------------------"
echo ""
echo "Command: Full pipeline with validation"
cat << 'SCRIPT'
#!/bin/bash
INPUT="/tmp/inventory.json"
OUTPUT="/tmp/inventory_report.csv"

# Validate input
if ! jq -e '.nodes' "$INPUT" > /dev/null 2>&1; then
    echo "ERROR: Invalid JSON or missing 'nodes' key" >&2
    exit 1
fi

# Extract and transform
{
    echo "hostname,ip,roles,cpu_status,memory_gb"
    jq -r '.nodes[] | [
        .hostname,
        .ip,
        (.roles | join(";")),
        (if .cpu > 50 then "HIGH" else "NORMAL" end),
        (.memory / 1024 | floor)
    ] | @csv' "$INPUT"
} > "$OUTPUT"

# Verify output
echo "Created $OUTPUT with $(wc -l < "$OUTPUT") lines"
head "$OUTPUT"
SCRIPT
echo ""
echo "Executing..."
INPUT="/tmp/inventory.json"
OUTPUT="/tmp/inventory_report.csv"
{
    echo "hostname,ip,roles,cpu_status,memory_gb"
    jq -r '.nodes[] | [.hostname, .ip, (.roles | join(";")), (if .cpu > 50 then "HIGH" else "NORMAL" end), (.memory / 1024 | floor)] | @csv' "$INPUT"
} > "$OUTPUT"
echo "Created $OUTPUT:"
cat "$OUTPUT"
echo ""

# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "YOUR TURN - TRY THESE:"
echo "------------------------------------------------------------------"
echo ""
echo "1. Extract only PSN nodes to CSV:"
echo "   jq -r '.nodes[] | select(.roles | contains([\"psn\"])) | [.hostname, .ip] | @csv' /tmp/inventory.json"
echo ""
echo "2. Generate prometheus scrape config:"
echo "   jq -r '.nodes[] | \"- targets: [\\\"\\(.ip):9090\\\"]\\n  labels:\\n    hostname: \\(.hostname)\"' /tmp/inventory.json"
echo ""
echo "3. Create markdown table:"
echo "   (echo '| Hostname | IP | CPU |'; echo '|---|---|---|'; jq -r '.nodes[] | \"| \\(.hostname) | \\(.ip) | \\(.cpu) |\"' /tmp/inventory.json)"
echo ""
echo "------------------------------------------------------------------"
echo "KEY TAKEAWAYS:"
echo "1. [field1, field2] | @csv for CSV output"
echo "2. (.array | join(\";\")) to flatten arrays"
echo "3. @tsv for tab-separated"
echo "4. Pipe to awk for formatting/aggregation"
echo "5. Validate input before processing"
echo "------------------------------------------------------------------"