Phase 2: Image Pipeline
Phase 2: Image Pipeline
Extract, audit, and manage metadata across photo libraries. Detect duplicates, strip geolocation, and build audit reports.
Metadata Extraction
# Full EXIF dump
exiftool photo.jpg
# Specific fields
exiftool -DateTimeOriginal -GPSLatitude -GPSLongitude -Model photo.jpg
# Recursive — all photos in directory (CSV output)
exiftool -csv -r -DateTimeOriginal -GPSPosition -Model -FileSize /path/to/photos > metadata.csv
# JSON output for pipeline processing
exiftool -json -r /path/to/photos | jq '.[] | {file: .SourceFile, date: .DateTimeOriginal, gps: .GPSPosition}'
Geolocation Audit
# Find all photos with GPS data
exiftool -r -if '$GPSLatitude' -p '$FileName — $GPSLatitude, $GPSLongitude' /path/to/photos
# Strip GPS from all photos (privacy)
exiftool -overwrite_original -gps:all= -r /path/to/photos
# Strip ALL metadata (nuclear option)
exiftool -overwrite_original -all= photo.jpg
# Audit: count photos with/without GPS
echo "With GPS:" && exiftool -r -if '$GPSLatitude' -p '1' /path/to/photos | wc -l
echo "Without:" && exiftool -r -if 'not $GPSLatitude' -p '1' /path/to/photos | wc -l
Duplicate Detection
# Exact duplicates (byte-for-byte)
fdupes -r /path/to/photos
# Exact dupes — delete interactively
fdupes -rd /path/to/photos
# Perceptual hash dedup (finds visually similar)
findimagedupes -t 90% /path/to/photos
# Size-based pre-filter then hash
find /path/to/photos -type f -name '*.jpg' -printf '%s %p\n' | sort -n | \
awk 'prev==$1{print prev_f"\n"$2} {prev=$1; prev_f=$2}'
Image Analysis
# Dimensions, format, color depth
identify -verbose photo.jpg
# Batch — find oversized images (>10MB)
find /path/to/photos -type f -size +10M -exec ls -lh {} \;
# File type verification (extension vs actual format)
find /path/to/photos -type f -exec sh -c \
'actual=$(file -b --mime-type "$1"); echo "$1 → $actual"' _ {} \;
# Steganography check (PNG/BMP)
zsteg image.png
Bulk Operations
# Rename photos by date taken
exiftool '-FileName<DateTimeOriginal' -d '%Y-%m-%d_%H%M%S%%-c.%%e' -r /path/to/photos
# Organize into YYYY/MM directories
exiftool '-Directory<DateTimeOriginal' -d '%Y/%m' -r /path/to/photos
# Generate contact sheet
montage /path/to/photos/*.jpg -geometry 200x200+2+2 -tile 10x contact-sheet.jpg