Grep Mastery

Overview

grep (Global Regular Expression Print) searches for patterns in files or input streams. Essential for log analysis, code searching, and text filtering.

Basic Syntax

grep [options] pattern [file...]
grep [options] -e pattern1 -e pattern2 [file...]
grep [options] -f pattern_file [file...]

Essential Options

Option Description Example

-i

Case insensitive

grep -i error log

-v

Invert match (NOT)

grep -v DEBUG log

-w

Whole word match

grep -w admin users

-x

Whole line match

grep -x "exact line"

-c

Count matches only

grep -c error log

-l

Files with matches

grep -l error *.log

-L

Files without matches

grep -L error *.log

-n

Show line numbers

grep -n error log

-r

Recursive search

grep -r pattern dir/

-R

Recursive + follow symlinks

grep -R pattern dir/

-h

Hide filename

grep -h error *.log

-H

Show filename (always)

grep -H error log

-o

Only matching part

grep -o 'IP=[0-9.]*'

-q

Quiet (exit code only)

grep -q error log

-s

Suppress errors

grep -s error file

-m N

Stop after N matches

grep -m 1 error log

-A N

N lines after match

grep -A 3 error log

-B N

N lines before match

grep -B 3 error log

-C N

N lines context (both)

grep -C 3 error log

--color

Highlight matches

grep --color error

Pattern Types

Basic Regex (BRE) - Default

# Literal match
grep 'error' file

# Any single character
grep 'err.r' file          # err + any char + r

# Character class
grep '[Ee]rror' file       # Error or error

# Negated class
grep '[^0-9]' file         # Non-digits

# Beginning of line
grep '^Error' file         # Lines starting with Error

# End of line
grep 'error$' file         # Lines ending with error

# Zero or more
grep 'err*' file           # er, err, errr, etc.

# BRE escapes for special meaning
grep 'error\|warning' file # BRE requires backslash for OR
grep '\(group\)' file      # BRE requires backslash for groups

Extended Regex (ERE) - with -E

# OR operator
grep -E 'error|warning' file

# One or more
grep -E 'err+' file        # er, err, errr... (one+ r)

# Zero or one
grep -E 'errors?' file     # error or errors

# Exact count
grep -E '[0-9]{3}' file    # Exactly 3 digits

# Range count
grep -E '[0-9]{1,3}' file  # 1 to 3 digits

# Groups
grep -E '(error|warn)ing' file

# Word boundary
grep -E '\berror\b' file   # Whole word "error"

Perl Regex (PCRE) - with -P

# Lookahead (match if followed by)
grep -P 'error(?=\s+critical)' file

# Negative lookahead
grep -P 'error(?!\s+critical)' file

# Lookbehind (match if preceded by)
grep -P '(?<=ERROR:\s)\d+' file

# Non-greedy matching
grep -Po '"[^"]*?"' file

# Named groups
grep -P '(?<ip>\d+\.\d+\.\d+\.\d+)' file

# Extract with \K (reset match start)
grep -Po 'user=\K[^\s]+' file    # Print only what follows user=

# Unicode classes
grep -P '\pL+' file              # Letters

# Word boundaries
grep -P '\berror\b' file

Common Patterns

IP Addresses

# Simple IP match (may match invalid IPs)
grep -E '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' file

# Extract IPs only
grep -oE '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' file

# Strict IP validation (0-255)
grep -P '\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b' file

MAC Addresses

# Colon format (AA:BB:CC:DD:EE:FF)
grep -Ei '([0-9a-f]{2}:){5}[0-9a-f]{2}' file

# Dash format (AA-BB-CC-DD-EE-FF)
grep -Ei '([0-9a-f]{2}-){5}[0-9a-f]{2}' file

# Cisco format (AABB.CCDD.EEFF)
grep -Ei '[0-9a-f]{4}\.[0-9a-f]{4}\.[0-9a-f]{4}' file

Email Addresses

grep -E '[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}' file

URLs

grep -E 'https?://[^\s]+' file
grep -oP 'https?://[^\s<>"]+' file

Timestamps

# ISO 8601 (2026-02-13)
grep -E '[0-9]{4}-[0-9]{2}-[0-9]{2}' file

# Time (HH:MM:SS)
grep -E '[0-9]{2}:[0-9]{2}:[0-9]{2}' file

# Syslog timestamp
grep -E '^[A-Z][a-z]{2} [0-9 ][0-9] [0-9]{2}:[0-9]{2}:[0-9]{2}' file

Log Analysis

Error Filtering

# Find errors (case insensitive)
grep -i error /var/log/syslog

# Find errors but not warnings
grep -i error log | grep -iv warning

# Multiple error types
grep -Ei 'error|fail|fatal|critical' log

# Exclude noise
grep -i error log | grep -v 'expected error'

# Count errors per file
grep -c -i error *.log

# Errors with context
grep -B 2 -A 5 'FATAL' log

Authentication Logs

# Failed logins
grep -i 'failed\|failure' /var/log/auth.log

# Successful logins
grep 'Accepted' /var/log/auth.log

# SSH connections
grep 'sshd' /var/log/auth.log

# Sudo usage
grep 'sudo' /var/log/auth.log

# Failed SSH per IP
grep 'Failed password' /var/log/auth.log | grep -oE '[0-9.]+' | sort | uniq -c | sort -rn

Web Server Logs

# 404 errors
grep '" 404 ' access.log

# 5xx errors
grep -E '" 5[0-9]{2} ' access.log

# Specific IP
grep '^192\.168\.1\.100' access.log

# POST requests
grep '"POST' access.log

# Requests to specific path
grep 'GET /api/' access.log

# Large responses (> 1MB)
awk '$10 > 1000000' access.log

Combining with Other Tools

With find

# Search in files found by find
find /var/log -name "*.log" -exec grep -l 'error' {} \;

# Using xargs (faster for many files)
find /var/log -name "*.log" | xargs grep -l 'error'

# Handle spaces in filenames
find /var/log -name "*.log" -print0 | xargs -0 grep -l 'error'

With awk

# Filter then process
grep 'ERROR' log | awk '{print $1, $5}'

# Count by category
grep 'ERROR' log | awk '{count[$3]++} END {for (k in count) print k, count[k]}'

With sed

# Find and modify
grep -l 'old_value' *.conf | xargs sed -i 's/old_value/new_value/g'

# Extract and transform
grep -o 'IP=[0-9.]*' log | sed 's/IP=//'

With sort/uniq

# Count unique occurrences
grep -o 'ERROR:[^,]*' log | sort | uniq -c | sort -rn

# Unique IPs
grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' log | sort -u

Performance Tips

Speed Optimizations

# Use fixed strings when possible (faster)
grep -F 'literal string' file

# Limit to first match
grep -m 1 'pattern' file

# Use ripgrep for large searches
rg 'pattern' directory/

# Ignore binary files
grep -I 'pattern' *

# Exclude directories
grep -r --exclude-dir={.git,node_modules} 'pattern' .

# Include only specific files
grep -r --include='*.py' 'pattern' .

Large File Handling

# Process in chunks
head -10000 largefile | grep 'pattern'
tail -10000 largefile | grep 'pattern'

# Use parallel processing
cat largefile | parallel --pipe grep 'pattern'

Exit Codes

Code Meaning

0

Match found

1

No match found

2

Error occurred

Using Exit Codes

# Conditional execution
grep -q 'error' log && echo "Errors found!"

# In scripts
if grep -q 'pattern' file; then
    echo "Found"
else
    echo "Not found"
fi

# Check multiple conditions
grep -q 'error' log || echo "No errors"

ripgrep (rg) Alternative

ripgrep is a faster, more modern alternative:

# Basic search (auto-recursive)
rg 'pattern' directory/

# Ignore case
rg -i 'pattern'

# Word boundary
rg -w 'pattern'

# Show context
rg -C 3 'pattern'

# File type filter
rg -t py 'pattern'

# Exclude patterns
rg --glob '!*.min.js' 'pattern'

# Fixed string
rg -F 'literal string'

# Count matches
rg -c 'pattern'

Quick Reference

# Basic patterns
grep 'pattern' file              # Find pattern
grep -i 'pattern' file           # Case insensitive
grep -v 'pattern' file           # Invert (NOT)
grep -w 'word' file              # Whole word

# Context
grep -n 'pattern' file           # Line numbers
grep -C 3 'pattern' file         # 3 lines context
grep -A 5 'pattern' file         # 5 lines after
grep -B 2 'pattern' file         # 2 lines before

# Multiple patterns
grep -E 'pat1|pat2' file         # OR patterns
grep -e 'pat1' -e 'pat2' file    # Multiple -e

# Files and directories
grep -r 'pattern' dir/           # Recursive
grep -l 'pattern' *.log          # Files with matches
grep -L 'pattern' *.log          # Files without matches

# Extraction
grep -o 'pattern' file           # Only matching part
grep -c 'pattern' file           # Count matches

# Performance
grep -F 'fixed' file             # Fixed string (fast)
grep -m 1 'pattern' file         # Stop at first match

CTF and Security Patterns

Flag Extraction

# Common CTF flag formats
grep -roE 'flag\{[^}]+\}' .
grep -roE 'CTF\{[^}]+\}' .
grep -roE 'picoCTF\{[^}]+\}' .

# Generic flag pattern (bracket-enclosed)
grep -oE '[A-Za-z]+\{[A-Za-z0-9_-]+\}' file

# Find flags in binary
strings binary | grep -E 'flag|ctf|key'

# Hidden in hex
xxd file | grep -E '666c6167|464c4147'  # "flag" or "FLAG" in hex

# Base64 encoded flags
grep -oE '[A-Za-z0-9+/]{20,}={0,2}' file | while read b64; do
    echo "$b64" | base64 -d 2>/dev/null | grep -q 'flag' && echo "FOUND: $b64"
done

Credential Hunting

# Password patterns
grep -rE 'password\s*[:=]\s*' .
grep -rE 'passwd\s*[:=]\s*' .
grep -rE 'pwd\s*[:=]\s*' .
grep -ri 'secret\|token\|apikey\|api_key' .

# AWS credentials
grep -rE 'AKIA[0-9A-Z]{16}' .                    # AWS Access Key ID
grep -rE '[0-9a-zA-Z/+]{40}' . | grep -v '\.git' # AWS Secret Key (40 char base64)

# Private keys
grep -rl 'BEGIN.*PRIVATE KEY' .
grep -rl 'BEGIN RSA PRIVATE KEY' .

# Database connection strings
grep -rE 'mysql://|postgres://|mongodb://|redis://' .
grep -rE 'jdbc:[a-z]+://' .

# JWT tokens
grep -oE 'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*' file

# SSH keys
grep -rE 'ssh-(rsa|ed25519|ecdsa)' .

# HTTP Basic Auth (base64 encoded)
grep -oE 'Basic [A-Za-z0-9+/=]+' file | while read auth; do
    echo "$auth" | cut -d' ' -f2 | base64 -d 2>/dev/null
done

Binary Analysis

# Extract strings from binary
strings -n 10 binary | grep -i 'password\|secret\|flag'

# Find embedded URLs
strings binary | grep -oE 'https?://[^\s<>"]+' | sort -u

# Extract email addresses
strings binary | grep -oE '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'

# Find file paths
strings binary | grep -E '^/[a-zA-Z0-9_/.-]+' | sort -u
strings binary | grep -E '^C:\\' | sort -u  # Windows paths

# Search for magic bytes (file signatures)
xxd binary | grep -E '504b0304|89504e47|ffd8ffe0|25504446'  # ZIP, PNG, JPEG, PDF

# Find function names
strings -n 5 binary | grep -E '^_?[a-z][a-z_0-9]*$' | sort -u

Network Forensics

# Extract IPs from file
grep -oE '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}' file | sort -u

# Extract IPs with port
grep -oE '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]+' file

# IPv6 addresses
grep -oE '([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}' file

# Extract domains
grep -oE '[a-zA-Z0-9][-a-zA-Z0-9]*\.[a-zA-Z]{2,}' file | sort -u

# Extract User-Agents
grep -oP 'User-Agent: \K.*' file

# HTTP headers
grep -E '^(GET|POST|PUT|DELETE|HEAD|OPTIONS|PATCH)' file
grep -E '^HTTP/[0-9.]+ [0-9]+' file

# DNS queries in logs
grep -oE 'query\[[A-Z]+\] [^ ]+' file

Memory Dump Analysis

# Credit card numbers (simple pattern)
grep -oE '\b[0-9]{4}[- ]?[0-9]{4}[- ]?[0-9]{4}[- ]?[0-9]{4}\b' memdump

# Social security numbers
grep -oE '\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b' memdump

# Phone numbers
grep -oE '\b\(?[0-9]{3}\)?[-. ]?[0-9]{3}[-. ]?[0-9]{4}\b' memdump

# Process paths
strings memdump | grep -E '/proc/[0-9]+/' | head -50

# Environment variables
strings memdump | grep -E '^[A-Z_]+=.+' | head -50

# Interesting strings with context
strings -n 8 memdump | grep -B 2 -A 2 'password'

Encoded Data Extraction

# Base64 extraction
grep -oE '[A-Za-z0-9+/]{20,}={0,2}' file

# Hex strings (even length)
grep -oE '\b[0-9a-fA-F]{20,}\b' file

# URL-encoded data
grep -oE '%[0-9A-Fa-f]{2}' file | tr -d '\n' | python3 -c 'import sys,urllib.parse; print(urllib.parse.unquote(sys.stdin.read()))'

# Find rot13
grep -oE '[A-Za-z]+' file | while read word; do
    echo "$word" | tr 'A-Za-z' 'N-ZA-Mn-za-m' | grep -q "flag\|secret\|key" && echo "ROT13: $word"
done

# Unicode escape sequences
grep -oE '\\u[0-9a-fA-F]{4}' file

Hash Detection

# MD5 (32 hex chars)
grep -oE '\b[a-fA-F0-9]{32}\b' file

# SHA1 (40 hex chars)
grep -oE '\b[a-fA-F0-9]{40}\b' file

# SHA256 (64 hex chars)
grep -oE '\b[a-fA-F0-9]{64}\b' file

# SHA512 (128 hex chars)
grep -oE '\b[a-fA-F0-9]{128}\b' file

# Bcrypt hashes
grep -oE '\$2[aby]?\$[0-9]+\$[A-Za-z0-9./]{53}' file

# Linux password hashes (shadow file format)
grep -oE '\$[0-9a]\$[^:]+' file

Data Extraction Patterns

Key-Value Extraction

# Extract value from key=value
grep -oP 'username=\K[^\s&]+' file
grep -oP 'password=\K[^\s&]+' file

# JSON field extraction (simple)
grep -oP '"api_key":\s*"\K[^"]+' file
grep -oP '"token":\s*"\K[^"]+' file

# Config file values
grep -oP '^DB_HOST=\K.*' .env
grep -oP 'host:\s*\K\S+' config.yaml

# XML attribute extraction
grep -oP 'id="\K[^"]+' file.xml
grep -oP '<password>\K[^<]+' file.xml

Structured Data

# Extract between delimiters
grep -oP '<<<\K[^>]+(?=>>>)' file
grep -oP '\[\[\K[^\]]+(?=\]\])' file

# Extract SQL queries
grep -oP "(?i)SELECT\s+.*?\s+FROM\s+\w+" file
grep -oP "(?i)INSERT\s+INTO\s+\w+" file

# Extract function calls
grep -oP '\w+\s*\([^)]*\)' file

# Extract imports/includes
grep -oE '^(import|from|include|require).*' file
grep -oE '#include\s*[<"][^">]+[">]' file.c

Log Parsing

# Syslog extraction
grep -oP '^\w+\s+\d+\s+\d+:\d+:\d+\s+\K\S+' /var/log/syslog  # hostname
grep -oP '^\w+\s+\d+\s+\d+:\d+:\d+\s+\S+\s+\K\S+' /var/log/syslog  # process

# Apache Combined Log Format fields
grep -oP '^\S+' access.log           # IP
grep -oP '"GET \K[^"]+' access.log   # Request path
grep -oP '" \K[0-9]+' access.log     # Status code

# Extract time ranges
grep -E '2026-02-13 1[0-4]:' file    # 10:00-14:59
grep -E '(Jan|Feb|Mar) (1[0-9]|2[0-9])' file  # Days 10-29

# Stack trace extraction
grep -A 20 'Exception\|Error' file | head -40
grep -B 5 -A 10 'FATAL' file

Advanced PCRE Patterns

Lookahead and Lookbehind

# Match word followed by specific word
grep -P 'user(?=\s+admin)' file          # "user" followed by " admin"

# Match word NOT followed by specific word
grep -P 'error(?!\s+expected)' file      # "error" not followed by " expected"

# Match word preceded by specific word
grep -P '(?<=password:\s)\S+' file       # Word after "password: "

# Match word NOT preceded by specific word
grep -P '(?<!test_)data' file            # "data" not preceded by "test_"

# Extract content between markers
grep -oP '(?<=\[START\]).*?(?=\[END\])' file

# Multiple conditions
grep -P '(?=.*user)(?=.*admin)' file     # Lines with both "user" AND "admin"

Non-Greedy Matching

# Greedy (default) - matches longest
grep -oP '".*"' file      # Matches from first " to last "

# Non-greedy - matches shortest
grep -oP '".*?"' file     # Matches individual quoted strings

# Practical: extract first JSON value
grep -oP '"name":\s*"[^"]*?"' file

# Extract HTML tags
grep -oP '<[^>]+?>' file

Backreferences

# Repeated words
grep -P '\b(\w+)\s+\1\b' file    # "the the", "is is"

# Matching quotes
grep -P '(["\x27]).*?\1' file    # Matches "text" or 'text'

# XML tag matching
grep -P '<(\w+)>.*?</\1>' file   # Matches <tag>content</tag>

Process Substitution with Grep

# Compare two filtered outputs
diff <(grep pattern file1 | sort) <(grep pattern file2 | sort)

# Search in command output
grep 'ERROR' <(dmesg)

# Search in multiple compressed files
zgrep 'pattern' file1.gz file2.gz

# Search with timeout
timeout 5 grep -r 'pattern' /large/directory

# Grep with progress for large searches
pv largefile | grep 'pattern'

Parallel Grep

# Using GNU parallel
find . -name "*.log" | parallel -j 8 grep -l 'pattern' {}

# Using xargs with parallelism
find . -name "*.log" -print0 | xargs -0 -P 4 grep -l 'pattern'

# ripgrep (inherently parallel)
rg -j 8 'pattern' directory/

# Split large file for parallel processing
split -n 4 largefile chunk_
parallel grep 'pattern' ::: chunk_*
cat results > combined_results
rm chunk_*