systemd & journalctl

Service management, units, and log analysis with journalctl.

Service Management

# BASIC SERVICE CONTROL
systemctl status sshd                    # Service status
systemctl start nginx                    # Start service
systemctl stop nginx                     # Stop service
systemctl restart nginx                  # Stop + start
systemctl reload nginx                   # Reload config (no downtime)
systemctl reload-or-restart nginx        # Reload if supported, else restart

# ENABLE/DISABLE (boot behavior)
systemctl enable nginx                   # Start on boot
systemctl disable nginx                  # Don't start on boot
systemctl enable --now nginx             # Enable AND start
systemctl is-enabled nginx               # Check boot status

# MASK/UNMASK (prevent any start)
systemctl mask bluetooth                 # Prevent start entirely
systemctl unmask bluetooth               # Allow start again
# Masking links unit to /dev/null

# LIST SERVICES
systemctl list-units --type=service      # Running services
systemctl list-units --type=service --all # All services
systemctl list-unit-files --type=service # All installed

# SERVICE INFO
systemctl show nginx                     # All properties
systemctl show nginx -p MainPID          # Specific property
systemctl cat nginx                      # Show unit file
systemctl edit nginx                     # Edit override file

# DEPENDENCIES
systemctl list-dependencies nginx        # What nginx needs
systemctl list-dependencies --reverse nginx  # What needs nginx

# FAILED SERVICES
systemctl --failed                       # Show failed units
systemctl reset-failed                   # Clear failed status
systemctl reset-failed nginx             # Clear specific

# INFRASTRUCTURE: Quick service check
for svc in sshd vault nginx postgresql; do
    status=$(systemctl is-active $svc 2>/dev/null)
    enabled=$(systemctl is-enabled $svc 2>/dev/null)
    printf "%-15s %s (%s)\n" "$svc" "$status" "$enabled"
done

Journalctl (Logs)

# BASIC LOG VIEWING
journalctl                               # All logs (huge!)
journalctl -f                            # Follow (like tail -f)
journalctl -n 100                        # Last 100 lines
journalctl -r                            # Reverse (newest first)

# FILTER BY UNIT
journalctl -u nginx                      # Single service
journalctl -u nginx -u postgresql        # Multiple services
journalctl -u nginx --since "1 hour ago"

# TIME FILTERS
journalctl --since "2024-01-15"          # Since date
journalctl --since "09:00" --until "10:00"  # Time range
journalctl --since "1 hour ago"          # Relative
journalctl --since "yesterday"
journalctl -b                            # Current boot only
journalctl -b -1                         # Previous boot

# PRIORITY FILTERS
journalctl -p err                        # Errors and above
journalctl -p warning                    # Warnings and above
journalctl -p debug                      # Everything (verbose)

# PRIORITY LEVELS:
# 0=emerg, 1=alert, 2=crit, 3=err, 4=warning, 5=notice, 6=info, 7=debug

# FIELD FILTERS
journalctl _PID=1234                     # By PID
journalctl _UID=1000                     # By user ID
journalctl _COMM=sshd                    # By command name
journalctl _EXE=/usr/sbin/sshd           # By executable
journalctl SYSLOG_FACILITY=10            # By facility

# KERNEL LOGS
journalctl -k                            # Kernel messages (dmesg)
journalctl -k -p err                     # Kernel errors

# OUTPUT FORMATS
journalctl -o verbose                    # All fields
journalctl -o json                       # JSON format
journalctl -o json-pretty                # Pretty JSON
journalctl -o short-iso                  # ISO timestamps
journalctl -o cat                        # Message only

# JSON PARSING
journalctl -u nginx -o json --since "1 hour ago" | \
    jq -r 'select(.PRIORITY <= "4") | .MESSAGE' | head -10

# DISK USAGE
journalctl --disk-usage                  # How much space used
journalctl --vacuum-size=500M            # Reduce to 500MB
journalctl --vacuum-time=7d              # Keep only 7 days
journalctl --vacuum-files=5              # Keep 5 archive files

# PERSISTENT CONFIGURATION
# /etc/systemd/journald.conf
# Storage=persistent
# SystemMaxUse=500M
# MaxRetentionSec=7d

# INFRASTRUCTURE: Log analysis patterns
# SSH failures
journalctl -u sshd --since "today" | grep -i "failed\|invalid"

# Service restarts
journalctl -u nginx --since "1 week ago" | grep -E "Start|Stop|Reload"

# Multi-host log check
for host in vault-01 bind-01 ise-01; do
    echo "=== $host (errors last hour) ==="
    ssh "$host" "journalctl -p err --since '1 hour ago' 2>/dev/null" | tail -5
done

Custom Unit Files

# UNIT FILE LOCATIONS
# /usr/lib/systemd/system/  - Package-installed (don't edit)
# /etc/systemd/system/      - Admin-created (takes precedence)
# ~/.config/systemd/user/   - User units

# BASIC SERVICE UNIT
cat > /etc/systemd/system/myapp.service <<'EOF'
[Unit]
Description=My Application Server
Documentation=https://example.com/docs
After=network.target postgresql.service
Requires=postgresql.service

[Service]
Type=simple
User=appuser
Group=appgroup
WorkingDirectory=/opt/myapp
Environment="NODE_ENV=production"
EnvironmentFile=/opt/myapp/.env
ExecStart=/opt/myapp/bin/server
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
RestartSec=5
StandardOutput=journal
StandardError=journal

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable --now myapp

# SERVICE TYPES
# Type=simple   - ExecStart is main process (default)
# Type=forking  - Forks to background (traditional daemons)
# Type=oneshot  - Runs once then exits (scripts)
# Type=notify   - Sends sd_notify() when ready
# Type=idle     - Waits for other jobs to finish

# FORKING SERVICE (traditional daemon)
[Service]
Type=forking
PIDFile=/run/myapp/myapp.pid
ExecStart=/opt/myapp/bin/start.sh
ExecStop=/opt/myapp/bin/stop.sh

# ONESHOT (initialization script)
[Service]
Type=oneshot
ExecStart=/usr/local/bin/init-script.sh
RemainAfterExit=yes
# RemainAfterExit=yes means service shows "active" after completion

# RESTART BEHAVIOR
Restart=always                           # Always restart
Restart=on-failure                       # Only on non-zero exit
Restart=on-abnormal                      # On signal/timeout
RestartSec=5                             # Wait 5 seconds
RestartPreventExitStatus=0 1 2           # Don't restart on these codes
StartLimitBurst=5                        # Max 5 restarts
StartLimitIntervalSec=60                 # In 60 seconds

# ENVIRONMENT VARIABLES
Environment="VAR1=value1" "VAR2=value2"
EnvironmentFile=/path/to/envfile         # Load from file
EnvironmentFile=-/path/to/optional       # - means optional

# SECURITY HARDENING
[Service]
NoNewPrivileges=yes
PrivateTmp=yes
PrivateDevices=yes
ProtectSystem=strict
ProtectHome=yes
ReadWritePaths=/var/lib/myapp
CapabilityBoundingSet=CAP_NET_BIND_SERVICE

# RESOURCE LIMITS
[Service]
CPUQuota=50%
MemoryMax=512M
MemoryHigh=384M
LimitNOFILE=65536
LimitNPROC=4096

Systemd Timers (Cron Replacement)

# TIMER + SERVICE PAIR
# For timer "backup.timer", systemd runs "backup.service"

# backup.timer
cat > /etc/systemd/system/backup.timer <<'EOF'
[Unit]
Description=Daily backup timer

[Timer]
OnCalendar=daily
# Or more specific:
# OnCalendar=*-*-* 02:30:00        # Every day at 2:30 AM
# OnCalendar=Mon *-*-* 03:00:00    # Every Monday at 3 AM
# OnCalendar=*-*-01 04:00:00       # First of month at 4 AM

Persistent=true
# Persistent=true runs missed runs on boot

RandomizedDelaySec=1800
# Add random delay up to 30 minutes (prevents thundering herd)

[Install]
WantedBy=timers.target
EOF

# backup.service
cat > /etc/systemd/system/backup.service <<'EOF'
[Unit]
Description=Backup service

[Service]
Type=oneshot
ExecStart=/usr/local/bin/backup.sh
User=backup
Nice=19
IOSchedulingClass=idle
EOF

systemctl daemon-reload
systemctl enable --now backup.timer

# LIST TIMERS
systemctl list-timers                    # Active timers
systemctl list-timers --all              # All timers

# OUTPUT SHOWS:
# NEXT                         LEFT          LAST                         PASSED       UNIT
# Mon 2024-01-15 03:00:00 EST  6h left       Sun 2024-01-14 03:00:00 EST  18h ago      backup.timer

# ONCALENDAR SYNTAX
# Examples:
# daily                        = *-*-* 00:00:00
# weekly                       = Mon *-*-* 00:00:00
# monthly                      = *-*-01 00:00:00
# *:0/15                       = Every 15 minutes
# *-*-* 02:00:00               = Every day at 2 AM
# Mon,Wed,Fri *-*-* 10:00:00   = M/W/F at 10 AM
# *-*-1,15 03:00:00            = 1st and 15th of month

# TEST CALENDAR SPECIFICATION
systemd-analyze calendar "Mon *-*-* 03:00:00"
systemd-analyze calendar "daily" --iterations=5

# MONOTONIC TIMERS (relative to events)
[Timer]
OnBootSec=5min                           # 5 min after boot
OnStartupSec=10min                       # 10 min after systemd start
OnActiveSec=1h                           # 1 hour after timer activation
OnUnitActiveSec=1h                       # 1 hour after service last ran
OnUnitInactiveSec=30min                  # 30 min after service stopped

# INFRASTRUCTURE: Cert renewal timer
cat > /etc/systemd/system/vault-cert-renew.timer <<'EOF'
[Unit]
Description=Renew Vault certificates weekly

[Timer]
OnCalendar=Sun *-*-* 02:00:00
Persistent=true
RandomizedDelaySec=1h

[Install]
WantedBy=timers.target
EOF

cat > /etc/systemd/system/vault-cert-renew.service <<'EOF'
[Unit]
Description=Renew Vault TLS certificate

[Service]
Type=oneshot
ExecStart=/usr/local/bin/vault-cert-renew.sh
User=root
EOF

# RUN TIMER MANUALLY (for testing)
systemctl start backup.service           # Run now (not timer)

# CHECK LAST RUN
journalctl -u backup.service -n 20

Targets & Boot

# TARGETS (like runlevels)
# graphical.target  = GUI (runlevel 5)
# multi-user.target = CLI (runlevel 3)
# rescue.target     = Single user (runlevel 1)
# emergency.target  = Minimal boot

# GET/SET DEFAULT TARGET
systemctl get-default
systemctl set-default multi-user.target  # Boot to CLI
systemctl set-default graphical.target   # Boot to GUI

# SWITCH TARGET NOW
systemctl isolate multi-user.target      # Switch to CLI
systemctl isolate rescue.target          # Single user mode

# LIST TARGETS
systemctl list-units --type=target
systemctl list-dependencies multi-user.target

# BOOT ANALYSIS
systemd-analyze                          # Boot time summary
systemd-analyze blame                    # Time per service
systemd-analyze critical-chain           # Critical path
systemd-analyze plot > boot.svg          # Visual graph

# SLOW BOOT DEBUG
systemd-analyze blame | head -20         # Slowest services
systemd-analyze critical-chain nginx     # What delayed nginx

# BOOT MESSAGES
journalctl -b                            # Current boot
journalctl -b -1                         # Previous boot
journalctl --list-boots                  # All recorded boots

# REBOOT/SHUTDOWN
systemctl reboot                         # Reboot
systemctl poweroff                       # Shutdown
systemctl halt                           # Halt (no poweroff)
systemctl suspend                        # Suspend to RAM
systemctl hibernate                      # Suspend to disk

# SCHEDULED SHUTDOWN
shutdown -r +10 "Rebooting in 10 minutes"
shutdown -c                              # Cancel

User Units (--user)

# USER SYSTEMD
# Runs per-user, doesn't require root
# Units in ~/.config/systemd/user/

# CREATE USER SERVICE
mkdir -p ~/.config/systemd/user/

cat > ~/.config/systemd/user/myapp.service <<'EOF'
[Unit]
Description=My User Application

[Service]
ExecStart=/home/user/bin/myapp
Restart=always
RestartSec=5

[Install]
WantedBy=default.target
EOF

# MANAGE USER UNITS
systemctl --user daemon-reload
systemctl --user enable --now myapp
systemctl --user status myapp
systemctl --user stop myapp

# USER LOGS
journalctl --user -u myapp
journalctl --user -u myapp -f

# ENABLE LINGERING (run without login)
# By default, user services stop on logout
loginctl enable-linger username          # Keep services running
loginctl disable-linger username         # Stop on logout

# CHECK LINGERING
loginctl show-user username | grep Linger

# INFRASTRUCTURE: k8s port forward as user service
cat > ~/.config/systemd/user/wazuh-dashboard-pf.service <<'EOF'
[Unit]
Description=Wazuh Dashboard Port Forward
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
ExecStart=/usr/bin/kubectl -n wazuh port-forward service/wazuh-dashboard 8443:443 --address 0.0.0.0
Restart=always
RestartSec=10
Environment="KUBECONFIG=/home/evanusmodestus/.kube/config"

[Install]
WantedBy=default.target
EOF

systemctl --user daemon-reload
systemctl --user enable --now wazuh-dashboard-pf

# USER TIMER EXAMPLE
cat > ~/.config/systemd/user/sync.timer <<'EOF'
[Unit]
Description=Sync timer

[Timer]
OnBootSec=5min
OnUnitActiveSec=30min

[Install]
WantedBy=timers.target
EOF

cat > ~/.config/systemd/user/sync.service <<'EOF'
[Unit]
Description=Sync files

[Service]
Type=oneshot
ExecStart=/home/user/bin/sync.sh
EOF

systemctl --user enable --now sync.timer

Override Files (Drop-ins)

# OVERRIDE PACKAGE UNITS WITHOUT MODIFYING THEM
# Creates /etc/systemd/system/service.d/override.conf

# INTERACTIVE EDIT (creates drop-in)
systemctl edit nginx                     # Opens editor
# Saves to /etc/systemd/system/nginx.service.d/override.conf

# FULL UNIT REPLACEMENT
systemctl edit --full nginx              # Edit full copy
# Saves to /etc/systemd/system/nginx.service

# MANUAL DROP-IN
mkdir -p /etc/systemd/system/nginx.service.d/

cat > /etc/systemd/system/nginx.service.d/override.conf <<'EOF'
[Service]
# Increase file limits
LimitNOFILE=65536

# Add environment
Environment="NGINX_WORKER_PROCESSES=4"

# Change restart behavior
RestartSec=10
EOF

systemctl daemon-reload
systemctl restart nginx

# VIEW EFFECTIVE CONFIGURATION
systemctl cat nginx                      # Shows all pieces

# COMMON OVERRIDES
# Memory limits
cat > /etc/systemd/system/service.d/limits.conf <<'EOF'
[Service]
MemoryMax=1G
CPUQuota=100%
EOF

# Environment file
cat > /etc/systemd/system/service.d/env.conf <<'EOF'
[Service]
EnvironmentFile=/etc/service/env
EOF

# Run as different user
cat > /etc/systemd/system/service.d/user.conf <<'EOF'
[Service]
User=appuser
Group=appgroup
EOF

# CLEAR OVERRIDE
rm -rf /etc/systemd/system/nginx.service.d/
systemctl daemon-reload

# INFRASTRUCTURE: Vault memory limit
mkdir -p /etc/systemd/system/vault.service.d/
cat > /etc/systemd/system/vault.service.d/limits.conf <<'EOF'
[Service]
MemoryMax=2G
MemoryHigh=1536M
CPUQuota=200%
LimitNOFILE=65536
EOF

systemctl daemon-reload
systemctl restart vault

Systemd Troubleshooting

# SERVICE WON'T START
# 1. Check status
systemctl status nginx

# 2. Check recent logs
journalctl -u nginx -n 50 --no-pager

# 3. Check syntax
systemd-analyze verify /etc/systemd/system/myapp.service

# 4. Test ExecStart manually
/usr/bin/nginx -t                        # Test as you would run it

# DEPENDENCY ISSUES
# What does service need?
systemctl list-dependencies nginx

# What failed before it?
journalctl -b -p err                     # Errors this boot

# MASKED SERVICE
systemctl unmask bluetooth               # If masked

# FILE PERMISSIONS
# ExecStart binary must be executable
# WorkingDirectory must exist
# User must exist

# SERVICE KEEPS RESTARTING
# Check StartLimitBurst
systemctl show nginx | grep -E 'StartLimit|Restart'

# Reset failure counter
systemctl reset-failed nginx

# SERVICE TYPE MISMATCH
# Forking daemon with Type=simple will fail
# Check if process forks with:
strace -f /usr/bin/daemon 2>&1 | grep fork

# COMMON ERRORS
# "Failed to start" - Check ExecStart path and permissions
# "code=exited, status=1" - Application error, check app logs
# "code=killed, signal=SEGV" - Application crashed
# "code=killed, signal=KILL" - OOM killer or manual kill

# DEBUG MODE
SYSTEMD_LOG_LEVEL=debug systemctl start nginx

# CHECK UNIT FILE PATHS
systemctl show nginx -p FragmentPath    # Where's the unit file?

# DEPENDENCY ORDERING
systemctl list-dependencies --after nginx   # What must start before
systemctl list-dependencies --before nginx  # What waits for this

# INFRASTRUCTURE: Debug service start
systemctl stop myapp
journalctl -f &                          # Watch logs in background
systemctl start myapp
# Ctrl+C to stop journalctl

Systemd Gotchas

# WRONG: Editing /usr/lib/systemd/system/ files
vim /usr/lib/systemd/system/nginx.service  # Will be overwritten!

# CORRECT: Use override files
systemctl edit nginx                     # Creates drop-in

# WRONG: Forgetting daemon-reload
vim /etc/systemd/system/myapp.service
systemctl restart myapp                  # Uses old config!

# CORRECT: Reload first
systemctl daemon-reload
systemctl restart myapp

# WRONG: Type=simple for forking daemon
[Service]
Type=simple
ExecStart=/usr/sbin/traditional-daemon   # Forks and exits!
# systemd thinks it crashed immediately

# CORRECT: Use Type=forking
[Service]
Type=forking
PIDFile=/run/daemon.pid
ExecStart=/usr/sbin/traditional-daemon

# WRONG: ExecStart with shell features
ExecStart=echo hello && echo world       # Won't work

# CORRECT: Use shell explicitly
ExecStart=/bin/sh -c 'echo hello && echo world'

# WRONG: Relative paths
ExecStart=myapp                          # Won't find it

# CORRECT: Absolute paths
ExecStart=/usr/local/bin/myapp

# WRONG: User service without lingering
systemctl --user enable myapp            # Stops on logout

# CORRECT: Enable lingering
loginctl enable-linger username
systemctl --user enable myapp

# WRONG: Timer without matching service
# backup.timer exists but backup.service doesn't

# CORRECT: Create both
# timer activates service with same name (minus .timer/.service)

# WRONG: Assuming immediate timer execution
systemctl enable backup.timer            # Doesn't run now

# CORRECT: Enable AND start
systemctl enable --now backup.timer

# WRONG: EnvironmentFile with quotes in file
# /etc/myapp/env
VAR="value with spaces"                  # Quotes included literally!

# CORRECT: No quotes needed in systemd env files
VAR=value with spaces

Quick Reference

# SERVICE CONTROL
systemctl start/stop/restart svc
systemctl enable/disable svc
systemctl enable --now svc               # Enable + start
systemctl status svc
systemctl cat svc                        # Show unit file

# LOGS
journalctl -u svc -f                     # Follow logs
journalctl -u svc --since "1 hour ago"
journalctl -u svc -p err                 # Errors only
journalctl --vacuum-size=500M            # Cleanup

# TIMERS
systemctl list-timers
systemd-analyze calendar "daily"         # Test syntax

# CUSTOM UNITS
/etc/systemd/system/myapp.service
systemctl daemon-reload                  # After changes
systemctl edit svc                       # Override

# USER UNITS
~/.config/systemd/user/
systemctl --user start/enable svc
loginctl enable-linger user

# DEBUG
systemctl --failed                       # Failed units
systemd-analyze blame                    # Boot time
journalctl -b -p err                     # Boot errors

# COMMON UNIT SECTIONS
[Unit]
Description=My Service
After=network.target

[Service]
Type=simple
ExecStart=/path/to/binary
Restart=always
User=appuser

[Install]
WantedBy=multi-user.target