Phase 5: Automation
Phase 5: Automation
Chain tools into reusable pipelines. Scheduled audits, batch processing, and forensic-ready scripts.
Pipeline Patterns
# Full document extraction pipeline
find /data/documents -name '*.pdf' -print0 | \
xargs -0 -P4 -I{} sh -c \
'pdftotext -layout "$1" "${1%.pdf}.txt" && echo "Done: $1"' _ {}
# Photo audit pipeline — find all geotagged photos, report locations
find ~/Photos -type f \( -name '*.jpg' -o -name '*.jpeg' \) -print0 | \
xargs -0 exiftool -if '$GPSLatitude' -p '$Directory/$FileName: $GPSLatitude $GPSLongitude' \
> geo-audit-$(date +%Y%m%d).txt
# Integrity baseline — hash all critical files
find /etc /usr/local/bin ~/.config -type f -print0 | \
xargs -0 sha256sum > ~/forensics/baseline-$(date +%Y%m%d).txt
# Diff against previous baseline
diff <(sort baseline-prev.txt) <(sort baseline-current.txt) | \
awk '/^[<>]/{print}'
Scheduled Audits
# Weekly photo metadata audit (cron)
0 3 * * 0 find ~/Photos -newer /tmp/.last-photo-audit -name '*.jpg' -exec exiftool -csv {} + >> ~/forensics/weekly-metadata.csv && touch /tmp/.last-photo-audit
# Daily file integrity check
0 2 * * * hashdeep -r -c sha256 -a -k ~/forensics/baseline.txt /critical/paths/ >> ~/forensics/integrity-$(date +\%Y\%m\%d).log 2>&1
# Monthly disk usage forensics
0 4 1 * * find / -type f -printf '%T+ %s %p\n' 2>/dev/null | sort -rn -k2 | head -100 > ~/forensics/large-files-$(date +\%Y\%m).txt
Batch Processing Scripts
# OCR all scanned PDFs in a directory
process_scans() {
local dir="${1:-.}"
find "$dir" -name '*.pdf' | while read -r pdf; do
txt="${pdf%.pdf}.txt"
if [ ! -f "$txt" ]; then
pdftotext "$pdf" "$txt" 2>/dev/null
# If pdftotext yields empty, try OCR
if [ ! -s "$txt" ]; then
pdfimages -png "$pdf" /tmp/ocr-page
tesseract /tmp/ocr-page-000.png "$txt" -l eng 2>/dev/null
rm -f /tmp/ocr-page-*.png
fi
echo "Processed: $pdf → $txt"
fi
done
}
# Strip metadata from all photos before sharing
sanitize_for_sharing() {
local src="$1" dst="$2"
cp -r "$src" "$dst"
exiftool -overwrite_original -all= -r "$dst"
echo "Sanitized $(find "$dst" -type f | wc -l) files"
}
Evidence Collection Script
# Forensic evidence collection — run on target system
collect_evidence() {
local case_id="$1"
local out="evidence-${case_id}-$(date +%Y%m%d-%H%M)"
mkdir -p "$out"
# System state
date -u > "$out/collection-time.txt"
uname -a > "$out/system-info.txt"
ps auxf > "$out/processes.txt"
ss -tulnp > "$out/network-connections.txt"
last -50 > "$out/login-history.txt"
journalctl --since "7 days ago" > "$out/journal-7d.txt"
# File timeline (last 7 days)
find / -type f -mtime -7 -printf '%T+ %u %p\n' 2>/dev/null | \
sort -r > "$out/recent-files.txt"
# Hash critical directories
find /etc /usr/bin /usr/sbin -type f -exec sha256sum {} + \
> "$out/system-hashes.txt" 2>/dev/null
# Package
tar czf "${out}.tar.gz" "$out"
sha256sum "${out}.tar.gz" > "${out}.tar.gz.sha256"
echo "Evidence collected: ${out}.tar.gz"
}