Cryptographic Hashing

Hash functions for integrity verification and checksums.

Hash Fundamentals

# HASH PROPERTIES
# 1. Deterministic: same input = same output
# 2. One-way: cannot reverse hash to input
# 3. Collision-resistant: hard to find two inputs with same hash
# 4. Avalanche effect: small input change = completely different hash

# HASH ALGORITHMS (by security level)
# MD5:     128-bit - BROKEN (collisions found) - DO NOT USE for security
# SHA-1:   160-bit - WEAK (theoretical attacks) - Avoid
# SHA-256: 256-bit - STRONG - Recommended
# SHA-384: 384-bit - STRONG - Extra security margin
# SHA-512: 512-bit - STRONG - Highest security, faster on 64-bit
# BLAKE2:  256/512-bit - STRONG - Faster than SHA, modern

# BASIC HASHING
echo -n "text" | sha256sum
# -n is CRITICAL! Without it, includes newline in hash
# Output: sha256-hash  -

# HASH FILE
sha256sum file.txt
# Output: sha256-hash  file.txt

sha256sum file.txt | awk '{print $1}'
# Just the hash

# MULTIPLE FILES
sha256sum *.txt
sha256sum -b *.bin                       # Binary mode (Windows compat)

# DIFFERENT ALGORITHMS
md5sum file.txt                          # 128-bit (legacy only)
sha1sum file.txt                         # 160-bit (legacy)
sha256sum file.txt                       # 256-bit (recommended)
sha384sum file.txt                       # 384-bit
sha512sum file.txt                       # 512-bit

# COMPARE HASHES
sha256sum file.txt
# Expected: abc123...
# Actual: abc123...
# Match!

# OPENSSL ALTERNATIVE
openssl dgst -sha256 file.txt
openssl dgst -sha512 file.txt
openssl dgst -md5 file.txt               # Legacy

# PIPELINE HASHING
cat file.txt | sha256sum
tar czf - directory/ | sha256sum         # Hash archive without saving

File Integrity Verification

# GENERATE CHECKSUMS FILE
sha256sum *.tar.gz > SHA256SUMS
sha256sum -b *.iso >> SHA256SUMS         # Append binary files

# CHECKSUMS FILE FORMAT
# abc123...  file1.tar.gz
# def456...  file2.tar.gz
# Note: two spaces between hash and filename

# VERIFY CHECKSUMS
sha256sum -c SHA256SUMS
# Output:
# file1.tar.gz: OK
# file2.tar.gz: OK

# STRICT VERIFICATION
sha256sum -c --strict SHA256SUMS
# Fails on improperly formatted lines

# QUIET MODE (only show failures)
sha256sum -c --quiet SHA256SUMS

# STATUS MODE (no output, just exit code)
sha256sum -c --status SHA256SUMS
echo $?  # 0 = all OK, 1 = at least one failure

# IGNORE MISSING FILES
sha256sum -c --ignore-missing SHA256SUMS

# VERIFY SINGLE FILE
echo "abc123...  file.txt" | sha256sum -c
# Or:
echo "abc123... *file.txt" | sha256sum -c -  # Binary mode marker

# DIRECTORY INTEGRITY
# Generate recursive checksums
find /path/to/dir -type f -exec sha256sum {} \; > dir-checksums.txt

# Verify later
sha256sum -c dir-checksums.txt

# WITH AWK FOR FORMATTING
find . -type f -name "*.adoc" -exec sha256sum {} \; | \
    awk '{printf "%-64s %s\n", $1, $2}'

# HASH ENTIRE DIRECTORY (order-independent)
find /path/to/dir -type f -exec sha256sum {} \; | sort | sha256sum
# Single hash representing entire directory state

# INFRASTRUCTURE PATTERNS

# Verify backup integrity
sha256sum /mnt/backups/ise-backup-2024-*.tar.gz > /mnt/backups/SHA256SUMS

# Verify before restore
cd /mnt/backups
sha256sum -c SHA256SUMS

# Hash configuration files
sha256sum /etc/sssd/sssd.conf /etc/krb5.conf > /root/config-hashes.txt

# Detect configuration drift
sha256sum -c /root/config-hashes.txt
# If any fail, config was modified

Password Hashing

# PASSWORD HASHING != REGULAR HASHING
# Password hashes need:
# 1. Salt (prevents rainbow tables)
# 2. Iteration/work factor (slows brute force)
# 3. Memory-hard (resists GPU attacks)

# ALGORITHMS (ranked by security)
# Argon2id: BEST - memory-hard, modern standard
# bcrypt:   GOOD - time-tested, widely supported
# scrypt:   GOOD - memory-hard
# PBKDF2:   ACCEPTABLE - many iterations required
# SHA-512 + salt: WEAK - too fast, use only if no alternative
# Plain SHA-256: BROKEN - rainbow tables exist

# LINUX PASSWORD HASHING

# SHA-512 (most common on Linux)
openssl passwd -6 "password"
# Output: $6$salt$hash...
# $6$ = SHA-512
# $5$ = SHA-256

# With explicit salt
openssl passwd -6 -salt "randomsalt" "password"

# Using mkpasswd (from whois package)
mkpasswd -m sha-512 "password"
mkpasswd -m sha-256 "password"
mkpasswd -m bcrypt "password"            # If available

# PYTHON PASSWORD HASHING

# bcrypt (pip install bcrypt)
python3 << 'EOF'
import bcrypt
password = b"secret123"
salt = bcrypt.gensalt(rounds=12)  # 2^12 iterations
hashed = bcrypt.hashpw(password, salt)
print(hashed.decode())

# Verify
if bcrypt.checkpw(password, hashed):
    print("Match!")
EOF

# Argon2 (pip install argon2-cffi)
python3 << 'EOF'
from argon2 import PasswordHasher
ph = PasswordHasher()
hash = ph.hash("secret123")
print(hash)

# Verify
try:
    ph.verify(hash, "secret123")
    print("Match!")
except:
    print("No match")
EOF

# PBKDF2
python3 << 'EOF'
import hashlib
import os
password = "secret123"
salt = os.urandom(16)
iterations = 600000  # OWASP 2023 recommendation

dk = hashlib.pbkdf2_hmac('sha256', password.encode(), salt, iterations)
print(f"Salt: {salt.hex()}")
print(f"Hash: {dk.hex()}")
EOF

# HTPASSWD (Apache/Nginx basic auth)
htpasswd -nB username                    # bcrypt (recommended)
htpasswd -n5 username                    # SHA-512
htpasswd -nm username                    # MD5 (legacy)

# Add to file
htpasswd -B /etc/nginx/.htpasswd username

# INFRASTRUCTURE PATTERNS

# Generate password for cloud-init
mkpasswd -m sha-512 "vm-password" > /tmp/cloud-init-pass.txt

# Verify user password hash format
sudo awk -F: '/evanusmodestus/{print $2}' /etc/shadow | head -c 3
# $6$ = SHA-512 (good)
# $5$ = SHA-256 (acceptable)
# $1$ = MD5 (upgrade immediately!)

# Check password hash strength
sudo awk -F: '{
    if ($2 ~ /^\$6\$/) algo="SHA-512 (good)"
    else if ($2 ~ /^\$5\$/) algo="SHA-256 (ok)"
    else if ($2 ~ /^\$y\$/) algo="yescrypt (best)"
    else if ($2 ~ /^\$1\$/) algo="MD5 (BAD!)"
    else algo="unknown"
    print $1": "algo
}' /etc/shadow

HMAC (Hash-based Message Authentication)

# HMAC = Hash + Secret Key
# Verifies both integrity AND authenticity
# Used in: API signatures, JWT, webhooks, secure cookies

# HMAC GENERATION (OpenSSL)
echo -n "message" | openssl dgst -sha256 -hmac "secret-key"
# Output: HMAC-SHA256(stdin)= abc123...

# Just the hash
echo -n "message" | openssl dgst -sha256 -hmac "secret-key" | awk '{print $2}'

# HMAC WITH BINARY KEY
openssl dgst -sha256 -mac HMAC -macopt hexkey:$(xxd -p -c 64 key.bin) file.txt

# HMAC IN PYTHON
python3 << 'EOF'
import hmac
import hashlib

key = b"secret-key"
message = b"message to authenticate"

signature = hmac.new(key, message, hashlib.sha256).hexdigest()
print(f"HMAC: {signature}")

# Verify (constant-time comparison!)
expected = signature
if hmac.compare_digest(signature, expected):
    print("Valid!")
EOF

# WEBHOOK SIGNATURE VERIFICATION
# Many services (GitHub, Stripe) sign webhooks with HMAC

# GitHub webhook verification
PAYLOAD='{"action":"push"}'
SECRET="webhook-secret"
EXPECTED_SIG="sha256=abc123..."

COMPUTED=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$SECRET" | awk '{print "sha256="$2}')
if [ "$COMPUTED" = "$EXPECTED_SIG" ]; then
    echo "Valid webhook"
fi

# AWS SIGNATURE V4 (simplified)
# AWS uses HMAC chains for request signing
DATE_KEY=$(echo -n "20240115" | openssl dgst -sha256 -hmac "AWS4$SECRET_KEY" -binary)
REGION_KEY=$(echo -n "us-east-1" | openssl dgst -sha256 -hmac "$DATE_KEY" -binary)
# ... continues

# JWT SIGNATURE (HMAC-SHA256)
# Header.Payload signed with secret
HEADER_PAYLOAD="eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
SECRET="jwt-secret"
SIGNATURE=$(echo -n "$HEADER_PAYLOAD" | openssl dgst -sha256 -hmac "$SECRET" -binary | base64 | tr '+/' '-_' | tr -d '=')
echo "JWT: ${HEADER_PAYLOAD}.${SIGNATURE}"

# INFRASTRUCTURE PATTERNS

# Sign backup manifests
BACKUP_FILE="ise-backup-2024-01-15.tar.gz"
BACKUP_HASH=$(sha256sum "$BACKUP_FILE" | awk '{print $1}')
HMAC_SIG=$(echo -n "$BACKUP_HASH" | openssl dgst -sha256 -hmac "$(gopass show -o v3/domains/d000/backup/signing-key)" | awk '{print $2}')
echo "$BACKUP_HASH $HMAC_SIG $BACKUP_FILE" >> backup-manifest.txt

# Verify backup
while read hash sig file; do
    computed_sig=$(echo -n "$hash" | openssl dgst -sha256 -hmac "$(gopass show -o v3/domains/d000/backup/signing-key)" | awk '{print $2}')
    if [ "$computed_sig" = "$sig" ]; then
        echo "$file: signature valid"
        sha256sum -c <<< "$hash  $file" && echo "$file: integrity OK"
    else
        echo "$file: SIGNATURE INVALID!"
    fi
done < backup-manifest.txt

Hash Verification Patterns

# DOWNLOAD VERIFICATION

# Typical pattern:
# 1. Download file
# 2. Download checksum file
# 3. Verify

curl -LO https://example.com/file.tar.gz
curl -LO https://example.com/file.tar.gz.sha256

sha256sum -c file.tar.gz.sha256

# OR if checksum is inline
EXPECTED="abc123..."
ACTUAL=$(sha256sum file.tar.gz | awk '{print $1}')
if [ "$EXPECTED" = "$ACTUAL" ]; then
    echo "Verified!"
else
    echo "HASH MISMATCH!"
    exit 1
fi

# GPG-SIGNED CHECKSUMS
# Some projects sign their checksum files
gpg --verify SHA256SUMS.sig SHA256SUMS
sha256sum -c SHA256SUMS

# ISO VERIFICATION (example: Rocky Linux)
curl -LO https://download.rockylinux.org/pub/rocky/9/isos/x86_64/Rocky-9.3-x86_64-minimal.iso
curl -LO https://download.rockylinux.org/pub/rocky/9/isos/x86_64/CHECKSUM

# Verify signature
gpg --keyserver keys.openpgp.org --recv-keys 702D426D350D275D
gpg --verify CHECKSUM

# Verify ISO
sha256sum -c --ignore-missing CHECKSUM

# HASH COMPARISON PATTERNS

# Compare two files without storing hashes
if [ "$(sha256sum file1 | awk '{print $1}')" = "$(sha256sum file2 | awk '{print $1}')" ]; then
    echo "Files identical"
fi

# More efficient for large files (stops at first difference)
cmp -s file1 file2 && echo "Identical" || echo "Different"

# Compare directories
HASH1=$(find dir1 -type f -exec sha256sum {} \; | sort | sha256sum)
HASH2=$(find dir2 -type f -exec sha256sum {} \; | sort | sha256sum)
[ "$HASH1" = "$HASH2" ] && echo "Directories match"

# INFRASTRUCTURE VERIFICATION

# Verify Antora build output
cd ~/atelier/_bibliotheca/domus-docs/build/site
find . -type f -name "*.html" -exec sha256sum {} \; | sort > /tmp/build-hashes.txt
# Compare with previous build to detect changes

# Verify certificate files
sha256sum /etc/ssl/certs/modestus-*.pem > /root/cert-hashes.txt
# Later:
sha256sum -c /root/cert-hashes.txt

# Verify VM image before deployment
EXPECTED="abc123..."
ACTUAL=$(sha256sum Rocky-9-GenericCloud.qcow2 | awk '{print $1}')
[ "$EXPECTED" = "$ACTUAL" ] || { echo "Image corrupted!"; exit 1; }

# Config drift detection
for host in vault-01 ise-01 bind-01; do
    ssh "$host" "sha256sum /etc/ssh/sshd_config" 2>/dev/null
done | sort -k2 | awk '{
    if (prev_hash && $1 != prev_hash) print "DRIFT: "$2" differs from "$prev_file
    prev_hash=$1; prev_file=$2
}'

Advanced Hashing Techniques

# HASH TREES (Merkle Trees)
# Used in: Git, blockchain, IPFS, ZFS
# Allows verifying parts without full data

# Simple implementation
hash_file() { sha256sum "$1" | awk '{print $1}'; }

# Hash leaf nodes (files)
H1=$(hash_file file1.txt)
H2=$(hash_file file2.txt)
H3=$(hash_file file3.txt)
H4=$(hash_file file4.txt)

# Hash intermediate nodes
H12=$(echo -n "${H1}${H2}" | sha256sum | awk '{print $1}')
H34=$(echo -n "${H3}${H4}" | sha256sum | awk '{print $1}')

# Root hash
ROOT=$(echo -n "${H12}${H34}" | sha256sum | awk '{print $1}')
echo "Root hash: $ROOT"

# KEYED HASHING WITHOUT HMAC
# Sometimes need simpler construction

# Prefix key (less secure than HMAC)
echo -n "secret-key:message" | sha256sum

# HKDF (HMAC-based Key Derivation)
# Expand short secret into multiple keys
python3 << 'EOF'
import hashlib
import hmac

def hkdf_expand(prk, info, length):
    """HKDF expand phase (simplified)"""
    hash_len = 32  # SHA-256
    n = (length + hash_len - 1) // hash_len
    okm = b""
    t = b""
    for i in range(1, n + 1):
        t = hmac.new(prk, t + info + bytes([i]), hashlib.sha256).digest()
        okm += t
    return okm[:length]

# Example: derive encryption and auth keys from master
master = b"master-secret"
prk = hmac.new(b"salt", master, hashlib.sha256).digest()
encryption_key = hkdf_expand(prk, b"encryption", 32)
auth_key = hkdf_expand(prk, b"authentication", 32)
print(f"Encryption key: {encryption_key.hex()}")
print(f"Auth key: {auth_key.hex()}")
EOF

# HASH-BASED COMMITMENT
# Commit to value without revealing it

# Commit phase
SECRET="my-secret-value"
NONCE=$(openssl rand -hex 16)
COMMITMENT=$(echo -n "${NONCE}:${SECRET}" | sha256sum | awk '{print $1}')
echo "Commitment: $COMMITMENT"  # Share this

# Reveal phase (later)
echo "Nonce: $NONCE"
echo "Secret: $SECRET"
# Verifier computes: sha256("${NONCE}:${SECRET}") and compares

# HASH CHAINS
# Each hash depends on previous (blockchain-like)

GENESIS="genesis-block"
PREV=$(echo -n "$GENESIS" | sha256sum | awk '{print $1}')
echo "Block 0: $PREV"

for i in 1 2 3 4 5; do
    DATA="block-$i-data"
    CURRENT=$(echo -n "${PREV}:${DATA}" | sha256sum | awk '{print $1}')
    echo "Block $i: $CURRENT"
    PREV="$CURRENT"
done

# HASH AS DEDUPLICATION KEY
# Used in backup systems (borg, restic)

# Generate content-addressed name
FILE="document.pdf"
HASH=$(sha256sum "$FILE" | awk '{print $1}')
EXT="${FILE##*.}"
# Store as: objects/ab/c123...def.pdf (first 2 chars as directory)
mkdir -p "objects/${HASH:0:2}"
cp "$FILE" "objects/${HASH:0:2}/${HASH}.${EXT}"

Hashing Gotchas

# WRONG: Forgetting -n with echo
echo "password" | sha256sum
# Includes newline! Different hash than "password"

# CORRECT: Use -n
echo -n "password" | sha256sum

# WRONG: Using MD5 for security
md5sum important-file.txt > checksums.md5
# MD5 has known collisions!

# CORRECT: Use SHA-256 minimum
sha256sum important-file.txt > checksums.sha256

# WRONG: Plain hash for passwords
echo -n "password123" | sha256sum
# Rainbow tables exist for common passwords!

# CORRECT: Use password hashing algorithm
mkpasswd -m sha-512 "password123"
# Includes salt, proper algorithm

# WRONG: Comparing hashes with ==
if [ "$(sha256sum file | awk '{print $1}')" == "$EXPECTED" ]; then
# Timing attack possible (short-circuits on first difference)

# CORRECT: Constant-time comparison (in Python)
import hmac
hmac.compare_digest(hash1, hash2)
# Or for bash, just use string comparison (less critical for non-crypto)

# WRONG: Trusting user-provided hash
USER_HASH="abc123..."
# User could provide hash of malicious file!

# CORRECT: Always compute hash yourself
COMPUTED=$(sha256sum downloaded-file | awk '{print $1}')
[ "$COMPUTED" = "$EXPECTED_FROM_TRUSTED_SOURCE" ]

# WRONG: Hash file then modify
sha256sum config.txt > hashes.txt
# Edit config.txt...
sha256sum -c hashes.txt  # Now fails!

# CORRECT: Hash after final version
# Make all changes first, then:
sha256sum config.txt > hashes.txt

# WRONG: Assuming same hash = same file
# Hash collisions exist (especially MD5)

# CORRECT: For critical files, use multiple hashes
sha256sum file.txt
sha512sum file.txt
# Collision in both is astronomically unlikely

# WRONG: Hashing encrypted data for integrity
# Encrypt then hash = attacker can modify ciphertext undetected

# CORRECT: Use authenticated encryption (AEAD)
# Or: HMAC the ciphertext with different key than encryption

# WRONG: Short HMAC key
echo -n "msg" | openssl dgst -sha256 -hmac "key"
# Key shorter than hash output = weaker security

# CORRECT: HMAC key at least as long as hash output
echo -n "msg" | openssl dgst -sha256 -hmac "$(openssl rand -hex 32)"
# 32 bytes = 256 bits, matches SHA-256

Quick Reference

# BASIC HASHING
echo -n "text" | sha256sum              # String (NO newline!)
sha256sum file.txt                      # File
sha256sum *.txt > SHA256SUMS            # Generate checksums
sha256sum -c SHA256SUMS                 # Verify checksums

# ALGORITHMS
md5sum file.txt                         # 128-bit (AVOID)
sha1sum file.txt                        # 160-bit (legacy)
sha256sum file.txt                      # 256-bit (recommended)
sha512sum file.txt                      # 512-bit (highest)

# PASSWORD HASHING
openssl passwd -6 "password"            # SHA-512 Linux format
mkpasswd -m sha-512 "password"          # Alternative
mkpasswd -m bcrypt "password"           # bcrypt (if available)

# HMAC
echo -n "msg" | openssl dgst -sha256 -hmac "key"
# Output: HMAC-SHA256(stdin)= abc123...

# VERIFICATION
sha256sum -c checksums.txt              # Check all
sha256sum -c --quiet checksums.txt      # Only show failures
sha256sum -c --status checksums.txt     # Exit code only

# FILE COMPARISON
cmp -s file1 file2 && echo "Same"       # Byte-by-byte (faster)
diff <(sha256sum file1) <(sha256sum file2)  # Hash comparison

# DIRECTORY HASHING
find dir/ -type f -exec sha256sum {} \; | sort | sha256sum
# Single hash for entire directory

# COMMON OUTPUTS
# MD5:    32 hex chars (128 bits)
# SHA-1:  40 hex chars (160 bits)
# SHA-256: 64 hex chars (256 bits)
# SHA-512: 128 hex chars (512 bits)