jq Data Transformation

This is senior-level jq. Data pivoting, time series analysis, set operations, and patterns that turn messy data into structured insights.


Data Pivoting

Long to Wide Format

Convert repeated key-value pairs into a single object:

# Input: Array of measurements
# [
#   {"host": "server-a", "metric": "cpu", "value": 50},
#   {"host": "server-a", "metric": "mem", "value": 70},
#   {"host": "server-b", "metric": "cpu", "value": 30}
# ]

# Output: Pivoted by host
# {"server-a": {"cpu": 50, "mem": 70}, "server-b": {"cpu": 30}}

jq 'group_by(.host) | map({
  (.[0].host): (map({(.metric): .value}) | add)
}) | add'

Wide to Long Format

Flatten nested structure into rows:

# Input: Nested object
# {"server-a": {"cpu": 50, "mem": 70}, "server-b": {"cpu": 30}}

# Output: Flattened rows
# [{"host": "server-a", "metric": "cpu", "value": 50}, ...]

jq 'to_entries | map(
  .key as $host |
  .value | to_entries | map({host: $host, metric: .key, value: .value})
) | flatten'

Cross-Tab (Pivot Table)

Create a pivot table showing counts:

# Input: Events with category and severity
# [{"category": "auth", "severity": "high"}, ...]

# Output: Category x Severity matrix
jq 'group_by(.category) | map({
  category: .[0].category,
  high: [.[] | select(.severity == "high")] | length,
  medium: [.[] | select(.severity == "medium")] | length,
  low: [.[] | select(.severity == "low")] | length
})'

Time Series Operations

Calculate Time Deltas

# Time between consecutive events
jq '[., .[1:]] | transpose | map(
  if .[1] then
    {
      from: .[0].timestamp,
      to: .[1].timestamp,
      delta_seconds: ((.[1].timestamp | fromdateiso8601) - (.[0].timestamp | fromdateiso8601))
    }
  else empty end
)'

Gap Detection

Find missing data points in time series:

# Find gaps > 5 minutes in time series
jq '[., .[1:]] | transpose | map(
  select(
    (.[1].timestamp | fromdateiso8601) - (.[0].timestamp | fromdateiso8601) > 300
  )
) | map({
  from: .[0].timestamp,
  to: .[1].timestamp,
  gap_seconds: ((.[1].timestamp | fromdateiso8601) - (.[0].timestamp | fromdateiso8601))
})'

Rolling Window Average

# 5-point rolling average
jq '[range(4; length)] as $indices |
  [$indices[] | . as $i | input[($i-4):($i+1)] | {
    timestamp: .[-1].timestamp,
    avg_value: ([.[] | .value] | add / length)
  }]' data.json

Bucket by Time Period

# Group by hour
jq 'group_by(.timestamp | fromdateiso8601 | . / 3600 | floor) | map({
  hour: (.[0].timestamp | fromdateiso8601 | strftime("%Y-%m-%d %H:00")),
  count: length,
  avg_value: ([.[] | .value] | add / length)
})'

# Group by day
jq 'group_by(.timestamp | split("T")[0]) | map({
  date: .[0].timestamp | split("T")[0],
  count: length,
  total: [.[] | .value] | add
})'

Time Range Filter

# Events in last 24 hours
jq --arg start "$(date -d '24 hours ago' -Iseconds)" '
  .[] | select(.timestamp >= $start)
'

# Events between dates
jq --arg start "2026-02-01" --arg end "2026-02-28" '
  .[] | select(
    (.timestamp | split("T")[0]) >= $start and
    (.timestamp | split("T")[0]) <= $end
  )
'

Set Operations

Find Added/Removed Items

Compare two snapshots:

# Items in new but not old (added)
jq -n --slurpfile old old.json --slurpfile new new.json '
  ($new[0] | map(.id)) - ($old[0] | map(.id)) |
  . as $added |
  $new[0] | map(select(.id | IN($added[])))
'

# Items in old but not new (removed)
jq -n --slurpfile old old.json --slurpfile new new.json '
  ($old[0] | map(.id)) - ($new[0] | map(.id)) |
  . as $removed |
  $old[0] | map(select(.id | IN($removed[])))
'

# Items changed (same ID, different content)
jq -n --slurpfile old old.json --slurpfile new new.json '
  ($old[0] | INDEX(.id)) as $oldIndex |
  $new[0] | map(select(
    .id as $id |
    $oldIndex[$id] != null and $oldIndex[$id] != .
  ))
'

Intersection

# Items in both arrays
jq -n --slurpfile a a.json --slurpfile b b.json '
  ($a[0] | map(.id)) as $a_ids |
  ($b[0] | map(.id)) as $b_ids |
  $a_ids | map(select(. as $id | $b_ids | contains([$id])))
'

Symmetric Difference

# Items in one but not both
jq -n --slurpfile a a.json --slurpfile b b.json '
  (($a[0] | map(.id)) - ($b[0] | map(.id))) +
  (($b[0] | map(.id)) - ($a[0] | map(.id)))
'

Hierarchical Data

Flatten with Path

Flatten any depth, preserving the path to each value:

# Input: {"a": {"b": {"c": 1}}, "d": [2, 3]}
# Output: [{"path": "a.b.c", "value": 1}, {"path": "d.0", "value": 2}, ...]

jq '[paths(scalars)] as $paths |
  [$paths[] | {path: (. | map(tostring) | join(".")), value: getpath(.)}]'

Unflatten from Path

Rebuild nested structure:

# Input: [{"path": "a.b.c", "value": 1}]
# Output: {"a": {"b": {"c": 1}}}

jq 'reduce .[] as $item ({};
  setpath($item.path | split(".") | map(tonumber? // .); $item.value)
)'

Tree to Flat List with Depth

# Recursive flatten with depth tracking
jq 'def flatten_tree(depth):
  . as $node |
  {name: $node.name, depth: depth},
  ($node.children[]? | flatten_tree(depth + 1));
flatten_tree(0)'

Conditional Logic

Multi-Level Case Statement

jq '.[] | .severity_label = (
  if .priority < 3 then "critical"
  elif .priority < 5 then "high"
  elif .priority < 7 then "medium"
  else "low"
  end
)'

Null-Safe Deep Access

# Chain with defaults at each level
jq '.config?.network?.dns?.servers? // .defaults?.dns? // ["8.8.8.8"]'

# With fallback object
jq '.data // {error: "no data", fallback: true}'

Conditional Field Addition

# Add field only if condition met
jq '.[] |
  if .status == "active" then
    . + {needs_review: false}
  else
    . + {needs_review: true, inactive_since: .last_seen}
  end
'

Aggregation Patterns

Multiple Aggregations

# Stats summary
jq '{
  count: length,
  sum: [.[] | .value] | add,
  avg: ([.[] | .value] | add / length),
  min: [.[] | .value] | min,
  max: [.[] | .value] | max,
  distinct: [.[] | .category] | unique | length
}'

Group and Aggregate

# Group by category, compute stats per group
jq 'group_by(.category) | map({
  category: .[0].category,
  count: length,
  total: [.[] | .value] | add,
  avg: ([.[] | .value] | add / length),
  items: [.[] | .name]
})'

Running Total

# Cumulative sum
jq 'reduce .[] as $item ([]; . + [{
  date: $item.date,
  value: $item.value,
  cumulative: ((.[- 1].cumulative // 0) + $item.value)
}])'

Batching and Chunking

Split Array into Batches

# Split into chunks of 100
jq 'def batch(n): [range(0; length; n)] | map(. as $i | input | .[$i:$i+n]); batch(100)' data.json

# Using _nwise (jq 1.6+)
jq '[_nwise(100)]'

Process with Progress

#!/bin/bash
TOTAL=$(jq 'length' huge.json)
jq -c '.[]' huge.json | nl | while read num item; do
  echo "$item" | jq -c '{processed: .id}'
  printf "\r%d/%d" "$num" "$TOTAL" >&2
done
echo >&2

String Manipulation

Parse Structured Strings

# Split and extract
jq '.fqdn | split(".") | {host: .[0], domain: .[1:] | join(".")}'

# Regex capture groups
jq '.log_line | capture("(?<timestamp>\\d{4}-\\d{2}-\\d{2}) (?<level>\\w+): (?<message>.*)")'

Build Strings from Data

# Interpolation
jq '"Host \(.name) at \(.ip) has \(.status) status"'

# Join array
jq '.tags | join(", ")'

# Multiline output
jq -r '"Name: \(.name)\nIP: \(.ip)\nStatus: \(.status)"'

Clean and Normalize

# Trim whitespace, lowercase, remove special chars
jq '.name | gsub("^\\s+|\\s+$"; "") | ascii_downcase | gsub("[^a-z0-9-]"; "-")'

# Truncate
jq '.description | .[:100] + (if length > 100 then "..." else "" end)'

Object Manipulation

Merge Objects

# Shallow merge (second wins)
jq '$defaults + $overrides' --argjson defaults '{"a":1,"b":2}' --argjson overrides '{"b":3,"c":4}'

# Deep merge
jq '$defaults * $overrides' --argjson defaults '{"a":{"x":1}}' --argjson overrides '{"a":{"y":2}}'

Rename Keys

# Single key
jq '.[] | {server_name: .name, server_ip: .ip}'

# Multiple keys via transformation
jq 'with_entries(
  if .key == "old_name" then .key = "new_name" else . end
)'

Remove/Keep Keys

# Remove specific keys
jq 'del(.password, .secret, .token)'

# Keep only specific keys
jq '{name, ip, status}'

# Remove keys matching pattern
jq 'with_entries(select(.key | test("^_") | not))'

Debugging Complex Queries

Progressive Refinement

# Build incrementally
cat data.json | jq 'type'
cat data.json | jq 'keys'
cat data.json | jq '.data | type'
cat data.json | jq '.data | keys'
cat data.json | jq '.data[0]'
cat data.json | jq '.data[] | select(.active)'
cat data.json | jq '.data[] | select(.active) | {name, ip}'

Debug Output

# Print intermediate values
jq '.[] | . as $item | debug | select(.active) | $item.name'

# Conditional debug
jq '.[] | if .status == "error" then (. | debug) else . end | .name'

# Print type at each step
jq '.data | (type | debug) | .[] | (type | debug) | .name'

Common Errors

Error Fix

null has no keys

Use optional ? operator: .missing?.field?

Cannot iterate over null

Provide default: (.array // [])[]

Cannot index string with string

Check type - you’re treating string as object

Output is escaped JSON

Add -r flag for raw strings

Unexpected token

Check for shell expansion - use single quotes


Performance Tips

Stream Large Files

# Don't load entire file into memory
jq --stream 'select(.[0][-1] == "name") | .[1]' huge.json

First Match Only

# Stop after first match
jq 'first(.[] | select(.status == "active"))'

Limit Output

jq '.[0:10]' huge_array.json

Use Index for Lookups

# Build index once, lookup many
jq 'INDEX(.id) as $index | $ids[] | $index[.]'

Ansible Inventory Generation

Generate dynamic inventory from multiple sources.

From ISE Endpoints

# Generate Ansible inventory from ISE endpoints
netapi ise --format json get-endpoints | jq '
  {
    linux_hosts: {
      hosts: [
        .[] | select(.profile | test("Linux"; "i")) |
        {
          (.mac | gsub(":"; "-")): {
            ansible_host: .ip,
            ise_profile: .profile,
            endpoint_group: .groupAssignment
          }
        }
      ] | add
    },
    windows_hosts: {
      hosts: [
        .[] | select(.profile | test("Windows"; "i")) |
        {
          (.mac | gsub(":"; "-")): {
            ansible_host: .ip,
            ise_profile: .profile
          }
        }
      ] | add
    }
  }
'

# Include group vars
netapi ise --format json get-endpoints | jq '
  group_by(.groupAssignment) |
  map({
    key: (.[0].groupAssignment | gsub(" "; "_") | ascii_downcase),
    value: {
      hosts: ([.[] | {(.mac): {ansible_host: .ip}}] | add),
      vars: {source: "ise", group: .[0].groupAssignment}
    }
  }) |
  from_entries
'

From Wazuh Agents

# Build inventory from Wazuh agent list
curl -s -k -u "$WAZUH_USER:$WAZUH_PASS" \
  "https://${WAZUH_API}/agents?limit=1000" | jq '
  .data.affected_items |
  group_by(.os.platform) |
  map({
    key: (.[0].os.platform // "unknown"),
    value: {
      hosts: ([.[] | select(.status == "active") | {(.name): {ansible_host: .ip, wazuh_id: .id}}] | add)
    }
  }) |
  from_entries
'

Merge Multiple Sources

# Combine ISE, Wazuh, and static inventory
jq -n --slurpfile ise ise_inventory.json \
      --slurpfile wazuh wazuh_inventory.json \
      --slurpfile static static_inventory.json '
  # Deep merge all inventories
  $static[0] * $ise[0] * $wazuh[0] |
  # Add metadata
  . + {
    _meta: {
      hostvars: (
        [.[] | .hosts? // {} | to_entries[]] |
        group_by(.key) |
        map({key: .[0].key, value: ([.[].value] | add)}) |
        from_entries
      )
    }
  }
'

Validate Inventory

# Check for hosts without ansible_host
cat inventory.json | jq '
  [.. | objects | select(has("hosts")) | .hosts | to_entries[]] |
  map(select(.value.ansible_host == null)) |
  map(.key)
'

# Find duplicate hosts across groups
cat inventory.json | jq '
  [.. | objects | select(has("hosts")) | .hosts | keys[]] |
  group_by(.) | map(select(length > 1)) | .[0]
'

Compliance Drift Detection

Compare configurations before and after changes.

Config Snapshot Comparison

# Compare two ISE policy exports
jq -n --slurpfile before before_policies.json --slurpfile after after_policies.json '
  def diff_objects:
    ($before[0] | INDEX(.name)) as $before_idx |
    ($after[0] | INDEX(.name)) as $after_idx |
    {
      added: [($after[0] | map(.name)) - ($before[0] | map(.name)) | .[] | {name: ., detail: $after_idx[.]}],
      removed: [($before[0] | map(.name)) - ($after[0] | map(.name)) | .[] | {name: ., detail: $before_idx[.]}],
      modified: [
        $after[0][] |
        .name as $name |
        select($before_idx[$name] != null and $before_idx[$name] != .) |
        {
          name: $name,
          changes: (
            (. | to_entries | map({key, new: .value})) as $new |
            ($before_idx[$name] | to_entries | map({key, old: .value})) as $old |
            ($new + $old) | group_by(.key) | map(add) |
            map(select(.old != .new))
          )
        }
      ]
    };
  diff_objects
'

# Summary of drift
jq -n --slurpfile before before.json --slurpfile after after.json '
  {
    added: (($after[0] | map(.id)) - ($before[0] | map(.id)) | length),
    removed: (($before[0] | map(.id)) - ($after[0] | map(.id)) | length),
    total_before: ($before[0] | length),
    total_after: ($after[0] | length)
  }
'

Authorization Profile Drift

# Compare dACL assignments
jq -n --slurpfile baseline baseline_profiles.json --slurpfile current current_profiles.json '
  ($baseline[0] | INDEX(.name)) as $base |
  $current[0][] |
  select($base[.name] != null) |
  select($base[.name].daclName != .daclName) |
  {
    profile: .name,
    baseline_dacl: $base[.name].daclName,
    current_dacl: .daclName,
    drift: "DACL_CHANGED"
  }
'

# Check for new profiles not in baseline
jq -n --slurpfile baseline baseline.json --slurpfile current current.json '
  ($baseline[0] | map(.name)) as $base_names |
  [$current[0][] | select(.name | IN($base_names[]) | not)] |
  map({name, created_after_baseline: true})
'

Network ACL Drift

# Compare firewall rules
jq -n --slurpfile before before_rules.json --slurpfile after after_rules.json '
  def rule_key: "\(.source)_\(.destination)_\(.port)_\(.action)";

  ($before[0] | map({key: rule_key, value: .}) | from_entries) as $before_map |
  ($after[0] | map({key: rule_key, value: .}) | from_entries) as $after_map |

  {
    rules_added: [($after_map | keys) - ($before_map | keys) | .[] | $after_map[.]],
    rules_removed: [($before_map | keys) - ($after_map | keys) | .[] | $before_map[.]],
    total_drift: ((($after_map | keys) - ($before_map | keys)) + (($before_map | keys) - ($after_map | keys))) | length
  }
'

Generate Drift Report

# Full drift report with timestamps
jq -n \
  --arg baseline_date "$(stat -c %y baseline.json | cut -d' ' -f1)" \
  --arg current_date "$(date +%Y-%m-%d)" \
  --slurpfile baseline baseline.json \
  --slurpfile current current.json '
  {
    report_generated: (now | strftime("%Y-%m-%d %H:%M:%S")),
    baseline_date: $baseline_date,
    current_date: $current_date,
    summary: {
      items_in_baseline: ($baseline[0] | length),
      items_current: ($current[0] | length),
      net_change: (($current[0] | length) - ($baseline[0] | length))
    },
    changes: {
      added: (($current[0] | map(.id)) - ($baseline[0] | map(.id))),
      removed: (($baseline[0] | map(.id)) - ($current[0] | map(.id)))
    }
  }
'

Continuous Compliance Check

#!/bin/bash
# compliance-check.sh - Run daily, alert on drift

BASELINE="$HOME/.compliance/baseline.json"
CURRENT="/tmp/current_state.json"

# Fetch current state
netapi ise --format json get-policy-sets > "$CURRENT"

# Compare
DRIFT=$(jq -n --slurpfile b "$BASELINE" --slurpfile c "$CURRENT" '
  {
    added: (($c[0] | map(.name)) - ($b[0] | map(.name)) | length),
    removed: (($b[0] | map(.name)) - ($c[0] | map(.name)) | length)
  } | .added + .removed
')

if [[ "$DRIFT" -gt 0 ]]; then
  echo "COMPLIANCE DRIFT DETECTED: $DRIFT changes since baseline"
  jq -n --slurpfile b "$BASELINE" --slurpfile c "$CURRENT" '
    {
      added: (($c[0] | map(.name)) - ($b[0] | map(.name))),
      removed: (($b[0] | map(.name)) - ($c[0] | map(.name)))
    }
  '
  exit 1
fi

echo "Compliance check passed - no drift detected"