Phase 2: Image Pipeline

Phase 2: Image Pipeline

Extract, audit, and manage metadata across photo libraries. Detect duplicates, strip geolocation, and build audit reports.

Metadata Extraction

# Full EXIF dump
exiftool photo.jpg

# Specific fields
exiftool -DateTimeOriginal -GPSLatitude -GPSLongitude -Model photo.jpg

# Recursive — all photos in directory (CSV output)
exiftool -csv -r -DateTimeOriginal -GPSPosition -Model -FileSize /path/to/photos > metadata.csv

# JSON output for pipeline processing
exiftool -json -r /path/to/photos | jq '.[] | {file: .SourceFile, date: .DateTimeOriginal, gps: .GPSPosition}'

Geolocation Audit

# Find all photos with GPS data
exiftool -r -if '$GPSLatitude' -p '$FileName — $GPSLatitude, $GPSLongitude' /path/to/photos

# Strip GPS from all photos (privacy)
exiftool -overwrite_original -gps:all= -r /path/to/photos

# Strip ALL metadata (nuclear option)
exiftool -overwrite_original -all= photo.jpg

# Audit: count photos with/without GPS
echo "With GPS:" && exiftool -r -if '$GPSLatitude' -p '1' /path/to/photos | wc -l
echo "Without:" && exiftool -r -if 'not $GPSLatitude' -p '1' /path/to/photos | wc -l

Duplicate Detection

# Exact duplicates (byte-for-byte)
fdupes -r /path/to/photos

# Exact dupes — delete interactively
fdupes -rd /path/to/photos

# Perceptual hash dedup (finds visually similar)
findimagedupes -t 90% /path/to/photos

# Size-based pre-filter then hash
find /path/to/photos -type f -name '*.jpg' -printf '%s %p\n' | sort -n | \
  awk 'prev==$1{print prev_f"\n"$2} {prev=$1; prev_f=$2}'

Image Analysis

# Dimensions, format, color depth
identify -verbose photo.jpg

# Batch — find oversized images (>10MB)
find /path/to/photos -type f -size +10M -exec ls -lh {} \;

# File type verification (extension vs actual format)
find /path/to/photos -type f -exec sh -c \
  'actual=$(file -b --mime-type "$1"); echo "$1 → $actual"' _ {} \;

# Steganography check (PNG/BMP)
zsteg image.png

Bulk Operations

# Rename photos by date taken
exiftool '-FileName<DateTimeOriginal' -d '%Y-%m-%d_%H%M%S%%-c.%%e' -r /path/to/photos

# Organize into YYYY/MM directories
exiftool '-Directory<DateTimeOriginal' -d '%Y/%m' -r /path/to/photos

# Generate contact sheet
montage /path/to/photos/*.jpg -geometry 200x200+2+2 -tile 10x contact-sheet.jpg