Cryptographic Hashing
Hash functions for integrity verification and checksums.
Hash Fundamentals
# HASH PROPERTIES
# 1. Deterministic: same input = same output
# 2. One-way: cannot reverse hash to input
# 3. Collision-resistant: hard to find two inputs with same hash
# 4. Avalanche effect: small input change = completely different hash
# HASH ALGORITHMS (by security level)
# MD5: 128-bit - BROKEN (collisions found) - DO NOT USE for security
# SHA-1: 160-bit - WEAK (theoretical attacks) - Avoid
# SHA-256: 256-bit - STRONG - Recommended
# SHA-384: 384-bit - STRONG - Extra security margin
# SHA-512: 512-bit - STRONG - Highest security, faster on 64-bit
# BLAKE2: 256/512-bit - STRONG - Faster than SHA, modern
# BASIC HASHING
echo -n "text" | sha256sum
# -n is CRITICAL! Without it, includes newline in hash
# Output: sha256-hash -
# HASH FILE
sha256sum file.txt
# Output: sha256-hash file.txt
sha256sum file.txt | awk '{print $1}'
# Just the hash
# MULTIPLE FILES
sha256sum *.txt
sha256sum -b *.bin # Binary mode (Windows compat)
# DIFFERENT ALGORITHMS
md5sum file.txt # 128-bit (legacy only)
sha1sum file.txt # 160-bit (legacy)
sha256sum file.txt # 256-bit (recommended)
sha384sum file.txt # 384-bit
sha512sum file.txt # 512-bit
# COMPARE HASHES
sha256sum file.txt
# Expected: abc123...
# Actual: abc123...
# Match!
# OPENSSL ALTERNATIVE
openssl dgst -sha256 file.txt
openssl dgst -sha512 file.txt
openssl dgst -md5 file.txt # Legacy
# PIPELINE HASHING
cat file.txt | sha256sum
tar czf - directory/ | sha256sum # Hash archive without saving
File Integrity Verification
# GENERATE CHECKSUMS FILE
sha256sum *.tar.gz > SHA256SUMS
sha256sum -b *.iso >> SHA256SUMS # Append binary files
# CHECKSUMS FILE FORMAT
# abc123... file1.tar.gz
# def456... file2.tar.gz
# Note: two spaces between hash and filename
# VERIFY CHECKSUMS
sha256sum -c SHA256SUMS
# Output:
# file1.tar.gz: OK
# file2.tar.gz: OK
# STRICT VERIFICATION
sha256sum -c --strict SHA256SUMS
# Fails on improperly formatted lines
# QUIET MODE (only show failures)
sha256sum -c --quiet SHA256SUMS
# STATUS MODE (no output, just exit code)
sha256sum -c --status SHA256SUMS
echo $? # 0 = all OK, 1 = at least one failure
# IGNORE MISSING FILES
sha256sum -c --ignore-missing SHA256SUMS
# VERIFY SINGLE FILE
echo "abc123... file.txt" | sha256sum -c
# Or:
echo "abc123... *file.txt" | sha256sum -c - # Binary mode marker
# DIRECTORY INTEGRITY
# Generate recursive checksums
find /path/to/dir -type f -exec sha256sum {} \; > dir-checksums.txt
# Verify later
sha256sum -c dir-checksums.txt
# WITH AWK FOR FORMATTING
find . -type f -name "*.adoc" -exec sha256sum {} \; | \
awk '{printf "%-64s %s\n", $1, $2}'
# HASH ENTIRE DIRECTORY (order-independent)
find /path/to/dir -type f -exec sha256sum {} \; | sort | sha256sum
# Single hash representing entire directory state
# INFRASTRUCTURE PATTERNS
# Verify backup integrity
sha256sum /mnt/backups/ise-backup-2024-*.tar.gz > /mnt/backups/SHA256SUMS
# Verify before restore
cd /mnt/backups
sha256sum -c SHA256SUMS
# Hash configuration files
sha256sum /etc/sssd/sssd.conf /etc/krb5.conf > /root/config-hashes.txt
# Detect configuration drift
sha256sum -c /root/config-hashes.txt
# If any fail, config was modified
Password Hashing
# PASSWORD HASHING != REGULAR HASHING
# Password hashes need:
# 1. Salt (prevents rainbow tables)
# 2. Iteration/work factor (slows brute force)
# 3. Memory-hard (resists GPU attacks)
# ALGORITHMS (ranked by security)
# Argon2id: BEST - memory-hard, modern standard
# bcrypt: GOOD - time-tested, widely supported
# scrypt: GOOD - memory-hard
# PBKDF2: ACCEPTABLE - many iterations required
# SHA-512 + salt: WEAK - too fast, use only if no alternative
# Plain SHA-256: BROKEN - rainbow tables exist
# LINUX PASSWORD HASHING
# SHA-512 (most common on Linux)
openssl passwd -6 "password"
# Output: $6$salt$hash...
# $6$ = SHA-512
# $5$ = SHA-256
# With explicit salt
openssl passwd -6 -salt "randomsalt" "password"
# Using mkpasswd (from whois package)
mkpasswd -m sha-512 "password"
mkpasswd -m sha-256 "password"
mkpasswd -m bcrypt "password" # If available
# PYTHON PASSWORD HASHING
# bcrypt (pip install bcrypt)
python3 << 'EOF'
import bcrypt
password = b"secret123"
salt = bcrypt.gensalt(rounds=12) # 2^12 iterations
hashed = bcrypt.hashpw(password, salt)
print(hashed.decode())
# Verify
if bcrypt.checkpw(password, hashed):
print("Match!")
EOF
# Argon2 (pip install argon2-cffi)
python3 << 'EOF'
from argon2 import PasswordHasher
ph = PasswordHasher()
hash = ph.hash("secret123")
print(hash)
# Verify
try:
ph.verify(hash, "secret123")
print("Match!")
except:
print("No match")
EOF
# PBKDF2
python3 << 'EOF'
import hashlib
import os
password = "secret123"
salt = os.urandom(16)
iterations = 600000 # OWASP 2023 recommendation
dk = hashlib.pbkdf2_hmac('sha256', password.encode(), salt, iterations)
print(f"Salt: {salt.hex()}")
print(f"Hash: {dk.hex()}")
EOF
# HTPASSWD (Apache/Nginx basic auth)
htpasswd -nB username # bcrypt (recommended)
htpasswd -n5 username # SHA-512
htpasswd -nm username # MD5 (legacy)
# Add to file
htpasswd -B /etc/nginx/.htpasswd username
# INFRASTRUCTURE PATTERNS
# Generate password for cloud-init
mkpasswd -m sha-512 "vm-password" > /tmp/cloud-init-pass.txt
# Verify user password hash format
sudo awk -F: '/evanusmodestus/{print $2}' /etc/shadow | head -c 3
# $6$ = SHA-512 (good)
# $5$ = SHA-256 (acceptable)
# $1$ = MD5 (upgrade immediately!)
# Check password hash strength
sudo awk -F: '{
if ($2 ~ /^\$6\$/) algo="SHA-512 (good)"
else if ($2 ~ /^\$5\$/) algo="SHA-256 (ok)"
else if ($2 ~ /^\$y\$/) algo="yescrypt (best)"
else if ($2 ~ /^\$1\$/) algo="MD5 (BAD!)"
else algo="unknown"
print $1": "algo
}' /etc/shadow
HMAC (Hash-based Message Authentication)
# HMAC = Hash + Secret Key
# Verifies both integrity AND authenticity
# Used in: API signatures, JWT, webhooks, secure cookies
# HMAC GENERATION (OpenSSL)
echo -n "message" | openssl dgst -sha256 -hmac "secret-key"
# Output: HMAC-SHA256(stdin)= abc123...
# Just the hash
echo -n "message" | openssl dgst -sha256 -hmac "secret-key" | awk '{print $2}'
# HMAC WITH BINARY KEY
openssl dgst -sha256 -mac HMAC -macopt hexkey:$(xxd -p -c 64 key.bin) file.txt
# HMAC IN PYTHON
python3 << 'EOF'
import hmac
import hashlib
key = b"secret-key"
message = b"message to authenticate"
signature = hmac.new(key, message, hashlib.sha256).hexdigest()
print(f"HMAC: {signature}")
# Verify (constant-time comparison!)
expected = signature
if hmac.compare_digest(signature, expected):
print("Valid!")
EOF
# WEBHOOK SIGNATURE VERIFICATION
# Many services (GitHub, Stripe) sign webhooks with HMAC
# GitHub webhook verification
PAYLOAD='{"action":"push"}'
SECRET="webhook-secret"
EXPECTED_SIG="sha256=abc123..."
COMPUTED=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "$SECRET" | awk '{print "sha256="$2}')
if [ "$COMPUTED" = "$EXPECTED_SIG" ]; then
echo "Valid webhook"
fi
# AWS SIGNATURE V4 (simplified)
# AWS uses HMAC chains for request signing
DATE_KEY=$(echo -n "20240115" | openssl dgst -sha256 -hmac "AWS4$SECRET_KEY" -binary)
REGION_KEY=$(echo -n "us-east-1" | openssl dgst -sha256 -hmac "$DATE_KEY" -binary)
# ... continues
# JWT SIGNATURE (HMAC-SHA256)
# Header.Payload signed with secret
HEADER_PAYLOAD="eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
SECRET="jwt-secret"
SIGNATURE=$(echo -n "$HEADER_PAYLOAD" | openssl dgst -sha256 -hmac "$SECRET" -binary | base64 | tr '+/' '-_' | tr -d '=')
echo "JWT: ${HEADER_PAYLOAD}.${SIGNATURE}"
# INFRASTRUCTURE PATTERNS
# Sign backup manifests
BACKUP_FILE="ise-backup-2024-01-15.tar.gz"
BACKUP_HASH=$(sha256sum "$BACKUP_FILE" | awk '{print $1}')
HMAC_SIG=$(echo -n "$BACKUP_HASH" | openssl dgst -sha256 -hmac "$(gopass show -o v3/domains/d000/backup/signing-key)" | awk '{print $2}')
echo "$BACKUP_HASH $HMAC_SIG $BACKUP_FILE" >> backup-manifest.txt
# Verify backup
while read hash sig file; do
computed_sig=$(echo -n "$hash" | openssl dgst -sha256 -hmac "$(gopass show -o v3/domains/d000/backup/signing-key)" | awk '{print $2}')
if [ "$computed_sig" = "$sig" ]; then
echo "$file: signature valid"
sha256sum -c <<< "$hash $file" && echo "$file: integrity OK"
else
echo "$file: SIGNATURE INVALID!"
fi
done < backup-manifest.txt
Hash Verification Patterns
# DOWNLOAD VERIFICATION
# Typical pattern:
# 1. Download file
# 2. Download checksum file
# 3. Verify
curl -LO https://example.com/file.tar.gz
curl -LO https://example.com/file.tar.gz.sha256
sha256sum -c file.tar.gz.sha256
# OR if checksum is inline
EXPECTED="abc123..."
ACTUAL=$(sha256sum file.tar.gz | awk '{print $1}')
if [ "$EXPECTED" = "$ACTUAL" ]; then
echo "Verified!"
else
echo "HASH MISMATCH!"
exit 1
fi
# GPG-SIGNED CHECKSUMS
# Some projects sign their checksum files
gpg --verify SHA256SUMS.sig SHA256SUMS
sha256sum -c SHA256SUMS
# ISO VERIFICATION (example: Rocky Linux)
curl -LO https://download.rockylinux.org/pub/rocky/9/isos/x86_64/Rocky-9.3-x86_64-minimal.iso
curl -LO https://download.rockylinux.org/pub/rocky/9/isos/x86_64/CHECKSUM
# Verify signature
gpg --keyserver keys.openpgp.org --recv-keys 702D426D350D275D
gpg --verify CHECKSUM
# Verify ISO
sha256sum -c --ignore-missing CHECKSUM
# HASH COMPARISON PATTERNS
# Compare two files without storing hashes
if [ "$(sha256sum file1 | awk '{print $1}')" = "$(sha256sum file2 | awk '{print $1}')" ]; then
echo "Files identical"
fi
# More efficient for large files (stops at first difference)
cmp -s file1 file2 && echo "Identical" || echo "Different"
# Compare directories
HASH1=$(find dir1 -type f -exec sha256sum {} \; | sort | sha256sum)
HASH2=$(find dir2 -type f -exec sha256sum {} \; | sort | sha256sum)
[ "$HASH1" = "$HASH2" ] && echo "Directories match"
# INFRASTRUCTURE VERIFICATION
# Verify Antora build output
cd ~/atelier/_bibliotheca/domus-docs/build/site
find . -type f -name "*.html" -exec sha256sum {} \; | sort > /tmp/build-hashes.txt
# Compare with previous build to detect changes
# Verify certificate files
sha256sum /etc/ssl/certs/modestus-*.pem > /root/cert-hashes.txt
# Later:
sha256sum -c /root/cert-hashes.txt
# Verify VM image before deployment
EXPECTED="abc123..."
ACTUAL=$(sha256sum Rocky-9-GenericCloud.qcow2 | awk '{print $1}')
[ "$EXPECTED" = "$ACTUAL" ] || { echo "Image corrupted!"; exit 1; }
# Config drift detection
for host in vault-01 ise-01 bind-01; do
ssh "$host" "sha256sum /etc/ssh/sshd_config" 2>/dev/null
done | sort -k2 | awk '{
if (prev_hash && $1 != prev_hash) print "DRIFT: "$2" differs from "$prev_file
prev_hash=$1; prev_file=$2
}'
Advanced Hashing Techniques
# HASH TREES (Merkle Trees)
# Used in: Git, blockchain, IPFS, ZFS
# Allows verifying parts without full data
# Simple implementation
hash_file() { sha256sum "$1" | awk '{print $1}'; }
# Hash leaf nodes (files)
H1=$(hash_file file1.txt)
H2=$(hash_file file2.txt)
H3=$(hash_file file3.txt)
H4=$(hash_file file4.txt)
# Hash intermediate nodes
H12=$(echo -n "${H1}${H2}" | sha256sum | awk '{print $1}')
H34=$(echo -n "${H3}${H4}" | sha256sum | awk '{print $1}')
# Root hash
ROOT=$(echo -n "${H12}${H34}" | sha256sum | awk '{print $1}')
echo "Root hash: $ROOT"
# KEYED HASHING WITHOUT HMAC
# Sometimes need simpler construction
# Prefix key (less secure than HMAC)
echo -n "secret-key:message" | sha256sum
# HKDF (HMAC-based Key Derivation)
# Expand short secret into multiple keys
python3 << 'EOF'
import hashlib
import hmac
def hkdf_expand(prk, info, length):
"""HKDF expand phase (simplified)"""
hash_len = 32 # SHA-256
n = (length + hash_len - 1) // hash_len
okm = b""
t = b""
for i in range(1, n + 1):
t = hmac.new(prk, t + info + bytes([i]), hashlib.sha256).digest()
okm += t
return okm[:length]
# Example: derive encryption and auth keys from master
master = b"master-secret"
prk = hmac.new(b"salt", master, hashlib.sha256).digest()
encryption_key = hkdf_expand(prk, b"encryption", 32)
auth_key = hkdf_expand(prk, b"authentication", 32)
print(f"Encryption key: {encryption_key.hex()}")
print(f"Auth key: {auth_key.hex()}")
EOF
# HASH-BASED COMMITMENT
# Commit to value without revealing it
# Commit phase
SECRET="my-secret-value"
NONCE=$(openssl rand -hex 16)
COMMITMENT=$(echo -n "${NONCE}:${SECRET}" | sha256sum | awk '{print $1}')
echo "Commitment: $COMMITMENT" # Share this
# Reveal phase (later)
echo "Nonce: $NONCE"
echo "Secret: $SECRET"
# Verifier computes: sha256("${NONCE}:${SECRET}") and compares
# HASH CHAINS
# Each hash depends on previous (blockchain-like)
GENESIS="genesis-block"
PREV=$(echo -n "$GENESIS" | sha256sum | awk '{print $1}')
echo "Block 0: $PREV"
for i in 1 2 3 4 5; do
DATA="block-$i-data"
CURRENT=$(echo -n "${PREV}:${DATA}" | sha256sum | awk '{print $1}')
echo "Block $i: $CURRENT"
PREV="$CURRENT"
done
# HASH AS DEDUPLICATION KEY
# Used in backup systems (borg, restic)
# Generate content-addressed name
FILE="document.pdf"
HASH=$(sha256sum "$FILE" | awk '{print $1}')
EXT="${FILE##*.}"
# Store as: objects/ab/c123...def.pdf (first 2 chars as directory)
mkdir -p "objects/${HASH:0:2}"
cp "$FILE" "objects/${HASH:0:2}/${HASH}.${EXT}"
Hashing Gotchas
# WRONG: Forgetting -n with echo
echo "password" | sha256sum
# Includes newline! Different hash than "password"
# CORRECT: Use -n
echo -n "password" | sha256sum
# WRONG: Using MD5 for security
md5sum important-file.txt > checksums.md5
# MD5 has known collisions!
# CORRECT: Use SHA-256 minimum
sha256sum important-file.txt > checksums.sha256
# WRONG: Plain hash for passwords
echo -n "password123" | sha256sum
# Rainbow tables exist for common passwords!
# CORRECT: Use password hashing algorithm
mkpasswd -m sha-512 "password123"
# Includes salt, proper algorithm
# WRONG: Comparing hashes with ==
if [ "$(sha256sum file | awk '{print $1}')" == "$EXPECTED" ]; then
# Timing attack possible (short-circuits on first difference)
# CORRECT: Constant-time comparison (in Python)
import hmac
hmac.compare_digest(hash1, hash2)
# Or for bash, just use string comparison (less critical for non-crypto)
# WRONG: Trusting user-provided hash
USER_HASH="abc123..."
# User could provide hash of malicious file!
# CORRECT: Always compute hash yourself
COMPUTED=$(sha256sum downloaded-file | awk '{print $1}')
[ "$COMPUTED" = "$EXPECTED_FROM_TRUSTED_SOURCE" ]
# WRONG: Hash file then modify
sha256sum config.txt > hashes.txt
# Edit config.txt...
sha256sum -c hashes.txt # Now fails!
# CORRECT: Hash after final version
# Make all changes first, then:
sha256sum config.txt > hashes.txt
# WRONG: Assuming same hash = same file
# Hash collisions exist (especially MD5)
# CORRECT: For critical files, use multiple hashes
sha256sum file.txt
sha512sum file.txt
# Collision in both is astronomically unlikely
# WRONG: Hashing encrypted data for integrity
# Encrypt then hash = attacker can modify ciphertext undetected
# CORRECT: Use authenticated encryption (AEAD)
# Or: HMAC the ciphertext with different key than encryption
# WRONG: Short HMAC key
echo -n "msg" | openssl dgst -sha256 -hmac "key"
# Key shorter than hash output = weaker security
# CORRECT: HMAC key at least as long as hash output
echo -n "msg" | openssl dgst -sha256 -hmac "$(openssl rand -hex 32)"
# 32 bytes = 256 bits, matches SHA-256
Quick Reference
# BASIC HASHING
echo -n "text" | sha256sum # String (NO newline!)
sha256sum file.txt # File
sha256sum *.txt > SHA256SUMS # Generate checksums
sha256sum -c SHA256SUMS # Verify checksums
# ALGORITHMS
md5sum file.txt # 128-bit (AVOID)
sha1sum file.txt # 160-bit (legacy)
sha256sum file.txt # 256-bit (recommended)
sha512sum file.txt # 512-bit (highest)
# PASSWORD HASHING
openssl passwd -6 "password" # SHA-512 Linux format
mkpasswd -m sha-512 "password" # Alternative
mkpasswd -m bcrypt "password" # bcrypt (if available)
# HMAC
echo -n "msg" | openssl dgst -sha256 -hmac "key"
# Output: HMAC-SHA256(stdin)= abc123...
# VERIFICATION
sha256sum -c checksums.txt # Check all
sha256sum -c --quiet checksums.txt # Only show failures
sha256sum -c --status checksums.txt # Exit code only
# FILE COMPARISON
cmp -s file1 file2 && echo "Same" # Byte-by-byte (faster)
diff <(sha256sum file1) <(sha256sum file2) # Hash comparison
# DIRECTORY HASHING
find dir/ -type f -exec sha256sum {} \; | sort | sha256sum
# Single hash for entire directory
# COMMON OUTPUTS
# MD5: 32 hex chars (128 bits)
# SHA-1: 40 hex chars (160 bits)
# SHA-256: 64 hex chars (256 bits)
# SHA-512: 128 hex chars (512 bits)