Docker

Docker fundamentals and compose orchestration.

Image Management

# List images
docker images                                        # All images
docker images --format "{{.Repository}}:{{.Tag}}"    # Just name:tag
docker images --filter "dangling=true"               # Untagged images
docker images --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}"

# Pull images
docker pull nginx:latest
docker pull nginx:1.25-alpine                        # Specific tag
docker pull registry.example.com/myapp:v1.0          # Private registry
docker pull --platform linux/amd64 nginx             # Specific architecture

# Build images
docker build -t myapp:latest .                       # Current directory
docker build -t myapp:v1.0 -f Dockerfile.prod .      # Specific Dockerfile
docker build --no-cache -t myapp:latest .            # Force rebuild
docker build --build-arg VERSION=1.0 -t myapp .      # Build arguments
docker build --target builder -t myapp:build .       # Multi-stage target

# Tag images
docker tag myapp:latest myapp:v1.0
docker tag myapp:latest registry.example.com/myapp:v1.0

# Push images
docker login registry.example.com
docker push registry.example.com/myapp:v1.0

# Image inspection
docker inspect nginx:latest
docker inspect nginx:latest --format '{{.Config.Cmd}}'
docker inspect nginx:latest --format '{{.Config.ExposedPorts}}'
docker history nginx:latest                          # Layer history
docker history nginx:latest --no-trunc               # Full commands

# Save/load images (offline transfer)
docker save myapp:latest -o myapp.tar
docker load -i myapp.tar
docker save myapp:latest | gzip > myapp.tar.gz       # Compressed
zcat myapp.tar.gz | docker load                      # Load compressed

# Remove images
docker rmi nginx:latest
docker rmi $(docker images -q --filter "dangling=true")  # Remove dangling
docker image prune                                   # Remove unused
docker image prune -a                                # Remove all unused

# Search Docker Hub
docker search nginx
docker search --filter "is-official=true" nginx

Container Lifecycle

# Run containers
docker run nginx                                     # Foreground
docker run -d nginx                                  # Detached (background)
docker run -d --name webserver nginx                 # Named container
docker run -it ubuntu bash                           # Interactive TTY
docker run --rm nginx                                # Auto-remove when stopped

# Port mapping
docker run -d -p 8080:80 nginx                       # Host:Container
docker run -d -p 80:80 -p 443:443 nginx              # Multiple ports
docker run -d -p 127.0.0.1:8080:80 nginx             # Localhost only
docker run -d -P nginx                               # Random host ports

# Volume mounts
docker run -v /host/path:/container/path nginx       # Bind mount
docker run -v myvolume:/data nginx                   # Named volume
docker run -v /host/path:/container/path:ro nginx    # Read-only
docker run --mount type=bind,src=/host,dst=/container nginx

# Environment variables
docker run -e "DB_HOST=localhost" myapp
docker run -e "DB_HOST=localhost" -e "DB_PORT=5432" myapp
docker run --env-file .env myapp                     # From file

# Resource limits
docker run -m 512m nginx                             # Memory limit
docker run --cpus 0.5 nginx                          # CPU limit (half core)
docker run --memory 1g --memory-swap 2g nginx        # Memory + swap

# Networking
docker run --network host nginx                      # Host network
docker run --network mynetwork nginx                 # Custom network
docker run --dns 10.50.1.90 nginx                    # Custom DNS

# Security
docker run --read-only nginx                         # Read-only filesystem
docker run --security-opt no-new-privileges nginx    # No privilege escalation
docker run --user 1000:1000 nginx                    # Run as specific user
docker run --cap-drop ALL --cap-add NET_BIND_SERVICE nginx

# Container management
docker ps                                            # Running containers
docker ps -a                                         # All containers
docker ps -aq                                        # All container IDs
docker ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"

# Start/stop/restart
docker stop webserver
docker start webserver
docker restart webserver
docker pause webserver                               # Pause (freeze)
docker unpause webserver

# Logs
docker logs webserver
docker logs -f webserver                             # Follow
docker logs --tail 100 webserver                     # Last 100 lines
docker logs --since 1h webserver                     # Last hour
docker logs -f --timestamps webserver                # With timestamps

# Exec into container
docker exec -it webserver bash                       # Interactive shell
docker exec -it webserver sh                         # For alpine images
docker exec webserver cat /etc/nginx/nginx.conf      # Run command
docker exec -u root webserver apt update             # As root

# Copy files
docker cp file.txt webserver:/path/                  # Host to container
docker cp webserver:/path/file.txt ./                # Container to host

# Remove containers
docker rm webserver                                  # Remove stopped
docker rm -f webserver                               # Force remove running
docker container prune                               # Remove all stopped

Networking

# List networks
docker network ls
docker network inspect bridge

# Create networks
docker network create mynetwork                      # Default bridge
docker network create --driver bridge mynetwork
docker network create --subnet 172.18.0.0/16 mynetwork
docker network create --internal mynetwork           # No external access

# Connect containers
docker network connect mynetwork webserver
docker network disconnect mynetwork webserver
docker run -d --network mynetwork --name app1 nginx
docker run -d --network mynetwork --name app2 nginx
# app1 can reach app2 by name: curl http://app2

# Network modes
docker run --network bridge nginx                    # Default
docker run --network host nginx                      # Host networking
docker run --network none nginx                      # No networking
docker run --network container:webserver nginx       # Share with another

# DNS and aliases
docker run --network mynetwork --network-alias myalias nginx
# Can reach by both container name AND alias

# Port inspection
docker port webserver                                # Show port mappings
docker inspect webserver --format '{{.NetworkSettings.IPAddress}}'

# Remove networks
docker network rm mynetwork
docker network prune                                 # Remove unused

Volumes and Storage

# List volumes
docker volume ls
docker volume inspect myvolume

# Create volumes
docker volume create myvolume
docker volume create --name myvolume --label project=myapp

# Use volumes
docker run -v myvolume:/data nginx                   # Named volume
docker run --mount source=myvolume,target=/data nginx

# Backup volume
docker run --rm -v myvolume:/data -v $(pwd):/backup alpine \
    tar cvf /backup/myvolume.tar /data

# Restore volume
docker run --rm -v myvolume:/data -v $(pwd):/backup alpine \
    tar xvf /backup/myvolume.tar -C /

# Volume drivers (for NFS, etc.)
docker volume create --driver local \
    --opt type=nfs \
    --opt o=addr=10.50.1.70,rw \
    --opt device=:/volume1/docker \
    nfs_volume

# Tmpfs mounts (in-memory)
docker run --tmpfs /tmp nginx
docker run --mount type=tmpfs,destination=/tmp,tmpfs-size=100m nginx

# Remove volumes
docker volume rm myvolume
docker volume prune                                  # Remove unused
docker volume prune -f                               # Force

# Dangling volumes (not connected to any container)
docker volume ls -f dangling=true

Dockerfile Best Practices

# Multi-stage build example (reduces final image size)
# Stage 1: Build
FROM golang:1.21-alpine AS builder
WORKDIR /app
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o myapp .

# Stage 2: Runtime
FROM alpine:3.19
RUN apk --no-cache add ca-certificates
WORKDIR /app
COPY --from=builder /app/myapp .
USER 1000:1000
EXPOSE 8080
CMD ["./myapp"]
# Build with .dockerignore
cat > .dockerignore <<'EOF'
.git
.gitignore
*.md
Dockerfile*
docker-compose*
.env*
__pycache__
*.pyc
node_modules
.npm
EOF

# Build arguments
# In Dockerfile:
# ARG VERSION=latest
# LABEL version=$VERSION

docker build --build-arg VERSION=1.2.3 -t myapp:1.2.3 .

# Labels for metadata
docker build --label "maintainer=admin@example.com" \
    --label "version=1.0" \
    -t myapp:1.0 .

# Build cache
docker build --cache-from myapp:latest -t myapp:new .

# Squash layers (experimental)
docker build --squash -t myapp:squashed .

# Build for different platforms
docker buildx build --platform linux/amd64,linux/arm64 -t myapp:multi .

Cleanup and Maintenance

# System overview
docker system df                                     # Disk usage
docker system df -v                                  # Detailed

# Aggressive cleanup
docker system prune                                  # Remove unused data
docker system prune -a                               # Remove ALL unused
docker system prune -a --volumes                     # Include volumes

# Selective cleanup
docker container prune                               # Stopped containers
docker image prune                                   # Dangling images
docker image prune -a                                # All unused images
docker volume prune                                  # Unused volumes
docker network prune                                 # Unused networks
docker builder prune                                 # Build cache

# Remove by age
docker image prune -a --filter "until=24h"           # Images older than 24h
docker container prune --filter "until=24h"

# Remove by label
docker container prune --filter "label=temporary"
docker image prune --filter "label=dev"

# Remove exited containers
docker rm $(docker ps -a -q -f status=exited)

# Stop all containers
docker stop $(docker ps -q)

# Remove all containers
docker rm -f $(docker ps -aq)

# Nuclear option (fresh start)
docker system prune -a --volumes -f
# WARNING: Removes everything!

Private Registry

# Run local registry
docker run -d -p 5000:5000 --name registry registry:2
docker run -d -p 5000:5000 \
    -v /data/registry:/var/lib/registry \
    --name registry registry:2

# Push to local registry
docker tag myapp:latest localhost:5000/myapp:latest
docker push localhost:5000/myapp:latest

# Pull from local registry
docker pull localhost:5000/myapp:latest

# Registry with TLS
docker run -d -p 5000:5000 \
    -v /certs:/certs \
    -e REGISTRY_HTTP_TLS_CERTIFICATE=/certs/server.crt \
    -e REGISTRY_HTTP_TLS_KEY=/certs/server.key \
    --name registry registry:2

# Login to registries
docker login                                         # Docker Hub
docker login registry.example.com                    # Private
docker login ghcr.io                                 # GitHub
docker login quay.io                                 # Quay

# Credential helpers
cat ~/.docker/config.json                            # Check config
# Use credential helpers instead of storing passwords

# List images in registry
curl -s http://localhost:5000/v2/_catalog | jq
curl -s http://localhost:5000/v2/myapp/tags/list | jq

Debugging and Troubleshooting

# Container inspection
docker inspect webserver
docker inspect webserver --format '{{.State.Status}}'
docker inspect webserver --format '{{.NetworkSettings.IPAddress}}'
docker inspect webserver --format '{{json .Config.Env}}' | jq

# Process view
docker top webserver                                 # Running processes
docker stats                                         # Live resource usage
docker stats --no-stream                             # Single snapshot
docker stats --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}"

# Events
docker events                                        # Real-time events
docker events --since 1h                             # Last hour
docker events --filter 'event=start'                 # Filter by event

# Diff (changes since start)
docker diff webserver
# A = Added, C = Changed, D = Deleted

# Export container filesystem
docker export webserver > webserver.tar
docker export webserver | tar -tvf -                 # List contents

# Commit changes (create image from container)
docker commit webserver myapp:modified
# Not recommended for production - use Dockerfiles

# Debug failing container
docker run --rm -it myapp:broken sh                  # Override entrypoint
docker run --rm -it --entrypoint sh myapp:broken    # Alternative

# Check why container exited
docker inspect webserver --format '{{.State.ExitCode}}'
docker inspect webserver --format '{{.State.Error}}'
docker logs webserver 2>&1 | tail -50

# Network debugging
docker run --rm --network mynetwork nicolaka/netshoot
# Then use tcpdump, dig, curl, etc.

# Resource issues
docker inspect webserver --format '{{.HostConfig.Memory}}'
docker stats webserver --no-stream

Infrastructure Patterns

# Kroki diagram server (domus-docs)
docker compose -f /home/evanusmodestus/atelier/_bibliotheca/domus-docs/docker-compose.yml up -d
docker compose -f /home/evanusmodestus/atelier/_bibliotheca/domus-docs/docker-compose.yml down

# ISE container (ISEEE - ISE Evaluation Edition)
# Not available - ISE is VM-only

# Wazuh containers (k3s deployed, but can run standalone)
docker run -d --name wazuh-manager \
    -p 1514:1514/udp -p 1515:1515 -p 55000:55000 \
    wazuh/wazuh-manager:4.14.3

# Local DNS testing
docker run -d --name bind \
    -p 53:53/tcp -p 53:53/udp \
    -v /etc/bind:/etc/bind \
    internetsystemsconsortium/bind9:9.18

# Vault for testing
docker run -d --name vault-dev \
    -p 8200:8200 \
    -e 'VAULT_DEV_ROOT_TOKEN_ID=root' \
    -e 'VAULT_DEV_LISTEN_ADDRESS=0.0.0.0:8200' \
    vault:latest

# PostgreSQL for testing
docker run -d --name postgres \
    -p 5432:5432 \
    -e POSTGRES_PASSWORD=testpass \
    -v pgdata:/var/lib/postgresql/data \
    postgres:15-alpine

# Redis for testing
docker run -d --name redis \
    -p 6379:6379 \
    redis:7-alpine

# Nginx reverse proxy
docker run -d --name nginx \
    -p 80:80 -p 443:443 \
    -v /etc/nginx/conf.d:/etc/nginx/conf.d:ro \
    -v /etc/ssl:/etc/ssl:ro \
    nginx:alpine

# Multi-container health check
SERVICES="kroki-core kroki-mermaid kroki-d2"
for svc in $SERVICES; do
    status=$(docker inspect "$svc" --format '{{.State.Health.Status}}' 2>/dev/null || echo "not found")
    printf "%-20s %s\n" "$svc" "$status"
done

Common Gotchas

# WRONG: Using :latest tag in production
docker run myapp:latest                              # Unpredictable version

# CORRECT: Use specific tags
docker run myapp:1.2.3

# WRONG: Running as root (default)
docker run nginx                                     # Runs as root

# CORRECT: Use non-root user
docker run --user 1000:1000 nginx
# Or in Dockerfile: USER 1000:1000

# WRONG: Storing secrets in images
# Dockerfile:
# ENV DB_PASSWORD=secret123                          # Visible in docker history!

# CORRECT: Use secrets at runtime
docker run -e DB_PASSWORD="$DB_PASSWORD" myapp       # From env
docker run --env-file .env myapp                     # From file

# WRONG: Not cleaning up apt cache
# Dockerfile:
# RUN apt update && apt install -y curl              # Cache left behind

# CORRECT: Clean in same layer
# RUN apt update && apt install -y curl && rm -rf /var/lib/apt/lists/*

# WRONG: COPY before dependency install (breaks cache)
# Dockerfile:
# COPY . .
# RUN npm install                                    # Reinstalls on any file change

# CORRECT: Copy dependency files first
# COPY package*.json ./
# RUN npm install
# COPY . .                                           # Only code changes invalidate

# WRONG: No health check
docker run nginx                                     # No way to know if healthy

# CORRECT: Add health check
docker run --health-cmd="curl -f http://localhost/ || exit 1" \
    --health-interval=30s nginx

# WRONG: Mounting sensitive host directories
docker run -v /:/host:rw myapp                       # Full host access!

# CORRECT: Mount only what's needed, read-only if possible
docker run -v /data:/data:ro myapp

Quick Reference

# Images
docker images                   # List images
docker pull image:tag           # Download
docker build -t name .          # Build
docker push image:tag           # Upload
docker rmi image                # Remove

# Containers
docker run -d --name X image    # Run detached
docker ps -a                    # List all
docker logs -f container        # Follow logs
docker exec -it container bash  # Shell access
docker stop/start container     # Control
docker rm container             # Remove

# Common run options
-d                              # Detach (background)
--name NAME                     # Container name
-p HOST:CONTAINER               # Port mapping
-v HOST:CONTAINER               # Volume mount
-e VAR=value                    # Environment
--rm                            # Auto-remove
-it                             # Interactive TTY

# Volumes
docker volume ls                # List volumes
docker volume create NAME       # Create
docker volume rm NAME           # Remove

# Networks
docker network ls               # List networks
docker network create NAME      # Create
docker network connect NET CTR  # Connect container

# Cleanup
docker system prune -a          # Remove all unused
docker container prune          # Remove stopped
docker image prune -a           # Remove unused images
docker volume prune             # Remove unused volumes

# Debug
docker inspect container        # Full details
docker stats                    # Resource usage
docker logs --tail 100 ctr      # Recent logs

Docker Compose Basics

# Start services (detached)
docker compose up -d

# Start with build
docker compose up -d --build

# Start specific services only
docker compose up -d postgres redis

# Stop and remove containers
docker compose down

# Stop, remove containers AND volumes (DESTRUCTIVE)
docker compose down -v

# Stop, remove everything including images
docker compose down --rmi all -v

WRONG: Using docker-compose (hyphen) - deprecated CORRECT: Using docker compose (space) - v2 plugin syntax

Logs and Monitoring

# Follow all logs
docker compose logs -f

# Follow specific service
docker compose logs -f postgres

# Last 100 lines with timestamps
docker compose logs --tail 100 -t

# Since specific time
docker compose logs --since 30m
docker compose logs --since "2024-01-01T10:00:00"

# Multiple services
docker compose logs -f app worker scheduler

Service Status

# Container status
docker compose ps

# Include stopped
docker compose ps -a

# JSON output for scripting
docker compose ps --format json | jq '.[].Name'

# Show ports only
docker compose ps --format "table {{.Name}}\t{{.Ports}}"

# Service health
docker compose ps --format json | jq -r '.[] | "\(.Name): \(.Health // "N/A")"'

Execute Commands

# Interactive shell
docker compose exec postgres psql -U admin

# Run command (not interactive)
docker compose exec -T postgres pg_dump -U admin mydb > backup.sql

# As specific user
docker compose exec -u root app bash

# In specific working directory
docker compose exec -w /app app ls -la

# With environment variables
docker compose exec -e DEBUG=1 app python manage.py shell

One-off Commands (run vs exec)

# run = new container, exec = existing container

# Run one-off command (creates new container)
docker compose run --rm app python manage.py migrate

# Run with no dependencies started
docker compose run --no-deps --rm app pytest

# Run with specific entrypoint
docker compose run --entrypoint /bin/sh app

# Run with port mapping
docker compose run -p 8000:8000 app python manage.py runserver 0.0.0.0:8000

GOTCHA: run starts dependencies by default. Use --no-deps for isolated commands.

Building Images

# Build all services
docker compose build

# Build without cache
docker compose build --no-cache

# Build specific service
docker compose build app

# Build with build args
docker compose build --build-arg VERSION=1.0.0 app

# Pull base images before build
docker compose build --pull

# Parallel build (faster)
docker compose build --parallel

Image Management

# Pull latest images
docker compose pull

# Pull specific service
docker compose pull postgres

# Pull in parallel
docker compose pull --parallel

# Show image digests
docker compose images

# Push built images (requires registry)
docker compose push

Scaling Services

# Scale workers
docker compose up -d --scale worker=3

# Scale multiple services
docker compose up -d --scale worker=3 --scale scheduler=2

# Scale down
docker compose up -d --scale worker=1

# View scaled containers
docker compose ps | grep worker

GOTCHA: Scaled services share the same ports. Use expose not ports, or configure load balancer.

Profiles (Conditional Services)

# docker-compose.yml
services:
  app:
    image: myapp:latest

  debug:
    image: debug-tools
    profiles:
      - debug

  monitoring:
    image: prometheus
    profiles:
      - monitoring
      - production
# Start default services only (no profile)
docker compose up -d

# Start with debug profile
docker compose --profile debug up -d

# Multiple profiles
docker compose --profile debug --profile monitoring up -d

# List services in profile
docker compose --profile debug config --services

Environment Variables

# Environment file (default: .env)
docker compose up -d

# Custom env file
docker compose --env-file .env.production up -d

# Override variable
POSTGRES_PASSWORD=secret docker compose up -d

# View resolved config
docker compose config

# Check variable interpolation
docker compose config | grep -A5 environment:
# docker-compose.yml - Variable patterns
services:
  app:
    environment:
      # Direct value
      - DEBUG=false
      # From shell/env file
      - DATABASE_URL=${DATABASE_URL}
      # With default
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
      # Required (fails if missing)
      - API_KEY=${API_KEY:?API_KEY is required}

Override Files

# Default merge order:
# 1. docker-compose.yml
# 2. docker-compose.override.yml (if exists)

# Explicit override files
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d

# Development override
docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d

# Preview merged config
docker compose -f docker-compose.yml -f docker-compose.prod.yml config
# docker-compose.override.yml (development)
services:
  app:
    build: .
    volumes:
      - .:/app  # Hot reload
    environment:
      - DEBUG=true
    ports:
      - "8000:8000"

# docker-compose.prod.yml
services:
  app:
    image: registry.example.com/app:${VERSION}
    restart: always
    deploy:
      replicas: 3

Service Dependencies

services:
  app:
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_started

  postgres:
    image: postgres:16
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
      interval: 5s
      timeout: 5s
      retries: 5

  redis:
    image: redis:7

GOTCHA: depends_on only waits for container start, NOT application ready. Use condition: service_healthy with healthchecks for proper sequencing.

Networks

services:
  frontend:
    networks:
      - frontend

  backend:
    networks:
      - frontend
      - backend

  database:
    networks:
      - backend

networks:
  frontend:
    driver: bridge
  backend:
    driver: bridge
    internal: true  # No external access
# List compose networks
docker network ls | grep myproject

# Inspect network
docker network inspect myproject_backend

# Connect existing container to compose network
docker network connect myproject_backend external-container

Volumes

services:
  postgres:
    image: postgres:16
    volumes:
      # Named volume (persistent)
      - postgres_data:/var/lib/postgresql/data
      # Bind mount (development)
      - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
      # Anonymous volume (container-managed)
      - /var/lib/postgresql/data

volumes:
  postgres_data:
    driver: local
    # Optional: NFS mount
    driver_opts:
      type: nfs
      o: addr=nas-01.inside.domusdigitalis.dev,rw
      device: ":/volume1/docker/postgres"
# List compose volumes
docker volume ls | grep myproject

# Backup volume
docker run --rm -v myproject_postgres_data:/data -v $(pwd):/backup \
  alpine tar czf /backup/postgres_backup.tar.gz /data

# Restore volume
docker run --rm -v myproject_postgres_data:/data -v $(pwd):/backup \
  alpine tar xzf /backup/postgres_backup.tar.gz -C /

Infrastructure: Kroki Diagram Server

# /home/evanusmodestus/atelier/_bibliotheca/domus-docs/docker-compose.yml
# Kroki diagram rendering for Antora builds

services:
  kroki:
    image: yuzutech/kroki:latest
    ports:
      - "127.0.0.1:8000:8000"  # Localhost only
    environment:
      - KROKI_SAFE_MODE=unsafe
      - KROKI_MAX_URI_LENGTH=8192
    security_opt:
      - no-new-privileges:true
    restart: unless-stopped

  kroki-mermaid:
    image: yuzutech/kroki-mermaid:latest
    expose:
      - "8002"
    security_opt:
      - no-new-privileges:true

  kroki-bpmn:
    image: yuzutech/kroki-bpmn:latest
    expose:
      - "8003"
    security_opt:
      - no-new-privileges:true

  kroki-excalidraw:
    image: yuzutech/kroki-excalidraw:latest
    expose:
      - "8004"
    security_opt:
      - no-new-privileges:true
# Start Kroki for Antora build
cd /home/evanusmodestus/atelier/_bibliotheca/domus-docs
docker compose up -d

# Verify all services running
docker compose ps

# Test diagram rendering
curl -s http://localhost:8000/d2/svg -d 'x -> y' | head -5

# Stop after build
docker compose down

Infrastructure: Monitoring Stack

# Prometheus + Grafana monitoring stack

services:
  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.retention.time=30d'
    restart: unless-stopped

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD}
      - GF_INSTALL_PLUGINS=grafana-clock-panel
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning:ro
    depends_on:
      - prometheus
    restart: unless-stopped

  alertmanager:
    image: prom/alertmanager:latest
    ports:
      - "9093:9093"
    volumes:
      - ./alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
    restart: unless-stopped

  node-exporter:
    image: prom/node-exporter:latest
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
    command:
      - '--path.procfs=/host/proc'
      - '--path.sysfs=/host/sys'
    restart: unless-stopped

volumes:
  prometheus_data:
  grafana_data:

Development Environment Pattern

# Full development stack with hot reload

services:
  app:
    build:
      context: .
      target: development
    volumes:
      - .:/app
      - node_modules:/app/node_modules
    ports:
      - "3000:3000"
      - "9229:9229"  # Node debugger
    environment:
      - NODE_ENV=development
      - DATABASE_URL=postgres://dev:dev@postgres:5432/dev
      - REDIS_URL=redis://redis:6379
    depends_on:
      postgres:
        condition: service_healthy
      redis:
        condition: service_started
    command: npm run dev

  postgres:
    image: postgres:16
    environment:
      - POSTGRES_USER=dev
      - POSTGRES_PASSWORD=dev
      - POSTGRES_DB=dev
    ports:
      - "5432:5432"
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U dev"]
      interval: 5s
      timeout: 5s
      retries: 5

  redis:
    image: redis:7-alpine
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data

  mailhog:
    image: mailhog/mailhog
    ports:
      - "1025:1025"
      - "8025:8025"
    profiles:
      - debug

volumes:
  postgres_data:
  redis_data:
  node_modules:  # Preserve node_modules across rebuilds

Infrastructure: Vault Development

# HashiCorp Vault for local development/testing

services:
  vault:
    image: hashicorp/vault:latest
    ports:
      - "8200:8200"
    environment:
      - VAULT_DEV_ROOT_TOKEN_ID=root
      - VAULT_DEV_LISTEN_ADDRESS=0.0.0.0:8200
    cap_add:
      - IPC_LOCK
    healthcheck:
      test: ["CMD", "vault", "status"]
      interval: 10s
      timeout: 5s
      retries: 3
# Start Vault dev server
docker compose up -d vault

# Configure with Vault CLI
export VAULT_ADDR='http://127.0.0.1:8200'
export VAULT_TOKEN='root'

# Enable KV secrets
vault secrets enable -path=secret kv-v2

# Test secret
vault kv put secret/myapp username=admin password=secret
vault kv get -format=json secret/myapp | jq '.data.data'

Troubleshooting

# Validate compose file
docker compose config

# Check why service won't start
docker compose logs failing-service

# Get container events
docker compose events

# Resource usage
docker compose top

# Inspect service configuration
docker compose config --services
docker compose config --volumes
docker compose config --images

# Force recreate containers
docker compose up -d --force-recreate

# Recreate specific service
docker compose up -d --force-recreate app

# Reset everything
docker compose down -v --rmi all
docker compose up -d --build

Common Gotchas

# WRONG: Using deprecated version field
# version: '3.8'  # Deprecated, remove this line

# WRONG: Relative paths in different directory
docker compose -f /other/path/docker-compose.yml up -d
# Paths are relative to compose file location, not current directory

# CORRECT: Change to compose file directory
cd /other/path && docker compose up -d

# WRONG: Assuming network DNS works immediately
docker compose up -d
docker compose exec app curl http://postgres:5432  # May fail

# CORRECT: Use depends_on with healthcheck
# See depends_on section above

# WRONG: Bind mount overwrites container files
volumes:
  - ./app:/app  # Overwrites /app including node_modules!

# CORRECT: Use anonymous volume for container-managed dirs
volumes:
  - ./app:/app
  - /app/node_modules  # Preserved from image

# WRONG: Hardcoding secrets in compose file
environment:
  - DATABASE_PASSWORD=supersecret

# CORRECT: Use env file or secrets
env_file:
  - .env.local  # Not committed to git

Production Best Practices

services:
  app:
    image: registry.example.com/app:${VERSION:-latest}

    # Always restart on failure
    restart: always

    # Resource limits
    deploy:
      resources:
        limits:
          cpus: '2'
          memory: 2G
        reservations:
          cpus: '0.5'
          memory: 512M

    # Health check
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

    # Security
    security_opt:
      - no-new-privileges:true
    read_only: true
    tmpfs:
      - /tmp

    # Logging
    logging:
      driver: json-file
      options:
        max-size: "10m"
        max-file: "3"
# Production deployment
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d

# Zero-downtime update
docker compose pull
docker compose up -d --no-deps --build app

# Rolling update with health checks
docker compose up -d --no-deps app
docker compose exec app curl -f http://localhost/health