RCA-2026-02-001: Fix Applied

Resolution

Solution: Libvirt Hook Script

Libvirt hooks execute on VM lifecycle events. The qemu hook configures VLANs AND PVID when VM starts.

Production Hook (v2.0)

Key improvements over initial implementation:

Feature Description

MAC-based vnet matching

Matches VM MAC addresses from XML to vnet peer MACs via sysfs - scopes to this VM only

Race condition fix

Initial hook enumerated ALL vnets on br-mgmt - caused conflicts during simultaneous VM starts

Polling with timeout

Replaces fragile sleep 3 with configurable wait (15 seconds default)

Error tracking

Counts and reports errors at completion

Per-VM logging

logger -t "libvirt-hook[${GUEST_NAME}]" for filtered journald queries

/etc/libvirt/hooks/qemu
#!/bin/bash
# /etc/libvirt/hooks/qemu
# Configures 802.1Q VLANs and PVID on br-mgmt vnet interfaces at VM start
# NOTE: Never use virsh here — deadlocks libvirtd
#
# INSTALL:
#   sudo cp libvirt-vlan-hook.sh /etc/libvirt/hooks/qemu
#   sudo chmod 755 /etc/libvirt/hooks/qemu
#   sudo systemctl restart libvirtd
#
# VERIFY:
#   journalctl -t "libvirt-hook[<vm-name>]" -f
#
# BUG FIX: Naive enumeration of all vnets on br-mgmt causes race condition
# during simultaneous VM starts. Fixed by matching VM MAC addresses from
# XML to vnet peer MACs via sysfs — scoped to this VM only.

GUEST_NAME="$1"
OPERATION="$2"

BRIDGE="br-mgmt"
VLANS="10 20 30 40 100 110 120"
VNET_WAIT_SECS=15   # max seconds to wait for vnets to attach

PVID100_VMS="vyos-01 vyos-02
9800-WLC-01 9800-WLC-02
ise-01 ise-02
bind-01 bind-02
home-dc01 home-dc02
keycloak-01 keycloak-02
ipsk-manager ipsk-manager-01 ipsk-mgr-01 ipsk-manager-02
vault-01 vault-02 vault-03
ipa-01 ipa-02
k3s-master-01 k3s-master-02 k3s-master-03
k3s-worker-01 k3s-worker-02 k3s-worker-03"

log()  { logger -t "libvirt-hook[${GUEST_NAME}]" "$*"; }
warn() { logger -t "libvirt-hook[${GUEST_NAME}]" "WARN: $*"; }
err()  { logger -t "libvirt-hook[${GUEST_NAME}]" "ERROR: $*"; }

needs_pvid100() {
    echo "$PVID100_VMS" | tr ' \n' '\n' | grep -qx "$1"
}

get_vm_vnets() {
    local guest="$1"
    local xml="/etc/libvirt/qemu/${guest}.xml"

    if [[ ! -f "$xml" ]]; then
        err "VM XML not found: $xml"
        return 1
    fi

    local macs
    macs=$(grep -oP "(?<=<mac address=[\"'])[0-9a-f:]+" "$xml")

    if [[ -z "$macs" ]]; then
        warn "No MAC addresses found in $xml"
        return 1
    fi

    local found=0
    for mac in $macs; do
        local suffix="${mac:3}"
        for vnet in $(ip link show master "$BRIDGE" 2>/dev/null \
                      | awk -F'[ :]+' '/vnet/{print $2}'); do
            local vnet_mac
            vnet_mac=$(cat /sys/class/net/"$vnet"/address 2>/dev/null)
            if [[ "${vnet_mac:3}" == "$suffix" ]]; then
                echo "$vnet"
                (( found++ ))
            fi
        done
    done

    [[ $found -eq 0 ]] && return 1
    return 0
}

configure_vnet() {
    local vnet="$1"
    local pvid="$2"
    local errors=0

    log "Configuring $vnet — bridge: $BRIDGE — VLANs: $VLANS — PVID: $pvid"

    # Verify interface still exists
    if ! ip link show "$vnet" &>/dev/null; then
        err "$vnet: interface disappeared before configuration"
        return 1
    fi

    # Remove default PVID 1
    bridge vlan del vid 1 dev "$vnet" pvid untagged 2>/dev/null

    # Add all tagged VLANs
    for vid in $VLANS; do
        if bridge vlan add vid "$vid" dev "$vnet" 2>/dev/null; then
            log "$vnet: added VLAN $vid"
        else
            warn "$vnet: failed to add VLAN $vid"
            (( errors++ ))
        fi
    done

    # Set PVID — must come after tagged VLANs are added
    if bridge vlan add vid "$pvid" dev "$vnet" pvid untagged 2>/dev/null; then
        log "$vnet: PVID set to $pvid"
    else
        err "$vnet: failed to set PVID $pvid"
        (( errors++ ))
    fi

    # Verify final state (single line for syslog)
    local vlan_state
    vlan_state=$(bridge vlan show dev "$vnet" 2>/dev/null | tr '\n' ' ')
    log "$vnet: final state — $vlan_state"

    return $errors
}

case "$OPERATION" in
    started)
        (
            log "started — waiting for vnets on $BRIDGE"

            # Poll until vnets appear — replaces fragile sleep 3
            vnets=()
            for i in $(seq 1 "$VNET_WAIT_SECS"); do
                mapfile -t vnets < <(get_vm_vnets "$GUEST_NAME")
                [[ ${#vnets[@]} -gt 0 ]] && break
                log "Waiting for vnets... attempt $i/${VNET_WAIT_SECS}"
                sleep 1
            done

            if [[ ${#vnets[@]} -eq 0 ]]; then
                err "No vnets found for $GUEST_NAME on $BRIDGE after ${VNET_WAIT_SECS}s — giving up"
                exit 1
            fi

            log "Found ${#vnets[@]} vnet(s): ${vnets[*]}"

            if needs_pvid100 "$GUEST_NAME"; then
                pvid=100
            else
                pvid=1
                warn "$GUEST_NAME not in PVID100 list — using PVID 1, verify this is correct"
            fi

            total_errors=0
            for vnet in "${vnets[@]}"; do
                configure_vnet "$vnet" "$pvid"
                (( total_errors += $? ))
            done

            if [[ $total_errors -eq 0 ]]; then
                log "Complete — ${#vnets[@]} interface(s) configured successfully"
            else
                err "Complete with $total_errors error(s) — check VLAN state manually"
                exit 1
            fi
        ) &
        ;;
    begin)
        log "begin — no action"
        ;;
    stopped|reconnect)
        log "$OPERATION — no action (bridge cleans up vnets automatically)"
        ;;
    *)
        log "unhandled operation: $OPERATION"
        ;;
esac

exit 0

Installation

sudo cp libvirt-vlan-hook.sh /etc/libvirt/hooks/qemu
sudo chmod 755 /etc/libvirt/hooks/qemu
sudo systemctl restart libvirtd

Verification

# Watch hook execution for specific VM
journalctl -t "libvirt-hook[9800-WLC-02]" -f

# Restart VM to trigger hook
sudo virsh shutdown 9800-WLC-02 && sleep 5 && sudo virsh start 9800-WLC-02

# Wait and verify PVID
sleep 10
VNET=$(sudo virsh domiflist 9800-WLC-02 | awk '/br-mgmt/ {print $1}')
bridge vlan show dev $VNET

Expected output:

port    vlan-id
vnet11  10
        20
        30
        40
        100 PVID Egress Untagged
        110
        120

CLI Mastery: Bridge VLAN Commands

Diagnostic Commands

# Find vnet interface for a VM
sudo virsh domiflist <vm-name> | awk '/br-mgmt/ {print $1}'

# Check VLAN config on vnet
bridge vlan show dev vnetN

# One-liner: Find VM's vnet and show its VLANs
VNET=$(sudo virsh domiflist 9800-WLC-02 | awk '/br-mgmt/ {print $1}') && bridge vlan show dev $VNET

# Show all vnets on a bridge with their VLANs
for vnet in $(ip link show master br-mgmt | awk -F': ' '/vnet/{print $2}'); do
  echo "=== $vnet ==="
  bridge vlan show dev $vnet
done

VLAN Configuration

# Add VLANs to a vnet (non-persistent)
for vid in 10 20 30 40 100 110 120; do
  sudo bridge vlan add vid $vid dev vnetN
done

# Set PVID (native VLAN for untagged traffic)
sudo bridge vlan del vid 1 dev vnetN pvid untagged
sudo bridge vlan add vid 100 dev vnetN pvid untagged

# Remove a VLAN
sudo bridge vlan del vid 10 dev vnetN

Sysfs MAC Address Inspection

# Get vnet MAC address via sysfs
cat /sys/class/net/vnet0/address

# Get VM MAC from libvirt XML
grep -oP '(?<=<mac address=["'"'"'])[0-9a-f:]+' /etc/libvirt/qemu/9800-WLC-02.xml

# Match vnets to VMs via MAC suffix (last 9 chars)
for vnet in $(ip link show master br-mgmt | awk -F': ' '/vnet/{print $2}'); do
  vnet_mac=$(cat /sys/class/net/$vnet/address)
  echo "$vnet: ${vnet_mac:3}"
done

Journald Filtering

# Watch all hook activity
journalctl -t libvirt-hook -f

# Filter by VM name
journalctl -t "libvirt-hook[vyos-01]" --since "1 hour ago"

# Show only errors
journalctl -t "libvirt-hook[9800-WLC-02]" -p err