Drill 03: String Processing
String methods, f-strings for formatting, and regex with the re module.
Run This Drill
bash ~/atelier/_bibliotheca/domus-captures/docs/modules/ROOT/examples/python-drills/03-strings.sh
Drill Script
#!/bin/bash
# PYTHON DRILL 03: STRING PROCESSING
# Paste this entire script into your terminal
# Topics: Formatting, regex, parsing, templates
echo "=================================================================="
echo " PYTHON DRILL 03: STRING PROCESSING "
echo "=================================================================="
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 3.1: STRING METHODS"
echo "Built-in string manipulation"
echo "------------------------------------------------------------------"
echo ""
python3 << 'PYEOF'
hostname = " ISE-01.inside.domusdigitalis.dev "
# Cleaning
print(f"Original: '{hostname}'")
print(f"strip(): '{hostname.strip()}'")
print(f"lower(): '{hostname.strip().lower()}'")
print(f"upper(): '{hostname.strip().upper()}'")
# Splitting
fqdn = "ise-01.inside.domusdigitalis.dev"
parts = fqdn.split(".")
print(f"\nsplit('.'): {parts}")
print(f"First part: {parts[0]}")
# Joining
hosts = ["ise-01", "ise-02", "bind-01"]
csv_line = ",".join(hosts)
print(f"\njoin with ',': {csv_line}")
# Replace
log = "[ERROR] Connection failed to 10.50.1.20"
redacted = log.replace("10.50.1.20", "REDACTED")
print(f"\nReplaced: {redacted}")
# Testing
print(f"\nstartswith('ise'): {fqdn.startswith('ise')}")
print(f"endswith('.dev'): {fqdn.endswith('.dev')}")
print(f"'inside' in fqdn: {'inside' in fqdn}")
PYEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 3.2: F-STRINGS (FORMATTED STRINGS)"
echo "The modern way to format strings"
echo "------------------------------------------------------------------"
echo ""
python3 << 'PYEOF'
hostname = "ise-01"
ip = "10.50.1.20"
port = 443
latency = 45.678
# Basic interpolation
print(f"Server: {hostname} at {ip}:{port}")
# Expressions inside
print(f"Upper: {hostname.upper()}")
print(f"Port + 1000: {port + 1000}")
# Width and alignment
print(f"\n{'Host':<15} {'IP':<15} {'Port':>5}")
print(f"{hostname:<15} {ip:<15} {port:>5}")
# Number formatting
print(f"\nLatency: {latency:.2f}ms")
print(f"Port hex: {port:#x}")
print(f"With commas: {1234567:,}")
# Padding
for i in range(1, 4):
print(f"ise-{i:02d}") # Zero-padded
PYEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 3.3: REGEX BASICS"
echo "Pattern matching with re module"
echo "------------------------------------------------------------------"
echo ""
python3 << 'PYEOF'
import re
text = """
Server: 10.50.1.20
Gateway: 10.50.1.1
DNS: 192.168.1.100
MAC: AA:BB:CC:DD:EE:FF
"""
# Find all IPs
ip_pattern = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
ips = re.findall(ip_pattern, text)
print(f"All IPs: {ips}")
# Find MAC
mac_pattern = r'([0-9A-F]{2}:){5}[0-9A-F]{2}'
mac = re.search(mac_pattern, text, re.IGNORECASE)
print(f"MAC found: {mac.group() if mac else 'None'}")
# Search vs Match
line = "Server: ise-01"
print(f"\nre.search('ise', line): {re.search('ise', line) is not None}")
print(f"re.match('ise', line): {re.match('ise', line) is not None}") # Only at start
print(f"re.match('Server', line): {re.match('Server', line) is not None}")
PYEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 3.4: REGEX SUBSTITUTION"
echo "Find and replace with patterns"
echo "------------------------------------------------------------------"
echo ""
python3 << 'PYEOF'
import re
log = "[2024-03-18 10:30:45] ERROR: Connection to 10.50.1.20 failed"
# Simple substitution
redacted = re.sub(r'\d+\.\d+\.\d+\.\d+', 'X.X.X.X', log)
print(f"Redacted: {redacted}")
# With groups (capture and reuse)
config = "server=10.50.1.20:443"
parsed = re.sub(r'(\d+\.\d+\.\d+\.\d+):(\d+)', r'host=\1 port=\2', config)
print(f"Parsed: {parsed}")
# Named groups
pattern = r'(?P<host>\d+\.\d+\.\d+\.\d+):(?P<port>\d+)'
match = re.search(pattern, config)
if match:
print(f"Host: {match.group('host')}, Port: {match.group('port')}")
PYEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 3.5: PARSING LOG LINES"
echo "Extract structured data from text"
echo "------------------------------------------------------------------"
echo ""
python3 << 'PYEOF'
import re
from datetime import datetime
logs = [
"[2024-03-18 10:30:45] INFO: Server started on port 8080",
"[2024-03-18 10:31:02] ERROR: Connection refused to 10.50.1.20",
"[2024-03-18 10:32:15] WARN: High CPU usage: 89%",
]
pattern = r'\[(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\] (\w+): (.+)'
parsed = []
for log in logs:
match = re.match(pattern, log)
if match:
timestamp, level, message = match.groups()
parsed.append({
"timestamp": timestamp,
"level": level,
"message": message
})
print("=== Parsed logs ===")
for entry in parsed:
print(f" [{entry['level']}] {entry['message']}")
# Filter by level
errors = [e for e in parsed if e['level'] == 'ERROR']
print(f"\n=== Errors only: {len(errors)} ===")
PYEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "DRILL 3.6: STRING TEMPLATES"
echo "Building config files from templates"
echo "------------------------------------------------------------------"
echo ""
python3 << 'PYEOF'
# Simple template with format
template = """
Host {hostname}
HostName {ip}
User {user}
Port {port}
"""
servers = [
{"hostname": "ise-01", "ip": "10.50.1.20", "user": "admin", "port": 22},
{"hostname": "vault-01", "ip": "10.50.1.132", "user": "vault", "port": 22},
]
print("=== Generated SSH Config ===")
for server in servers:
print(template.format(**server))
# Multiline with triple-quoted f-string
def generate_nginx_upstream(name, servers):
server_lines = "\n ".join(f"server {s};" for s in servers)
return f"""
upstream {name} {{
{server_lines}
}}
"""
backends = ["10.50.10.101:80", "10.50.10.102:80"]
print(generate_nginx_upstream("web_backend", backends))
PYEOF
echo ""
# ---------------------------------------------------------------------------
echo "------------------------------------------------------------------"
echo "YOUR TURN - TRY THESE:"
echo "------------------------------------------------------------------"
echo ""
echo "1. Extract hostname from FQDN:"
echo " fqdn = 'ise-01.inside.domusdigitalis.dev'"
echo " hostname = fqdn.split('.')[0]"
echo ""
echo "2. Find all MACs in text:"
echo " import re"
echo " text = 'MAC1: AA:BB:CC:DD:EE:FF, MAC2: 11:22:33:44:55:66'"
echo " re.findall(r'([0-9A-F]{2}:){5}[0-9A-F]{2}', text, re.I)"
echo ""
echo "3. Zero-pad numbers:"
echo " for i in range(1, 6):"
echo " print(f'server-{i:03d}')"
echo ""
echo "------------------------------------------------------------------"
echo "KEY TAKEAWAYS:"
echo "1. str.split(), str.join(), str.strip()"
echo "2. f-strings: f'{var}', f'{var:.2f}', f'{var:<10}'"
echo "3. re.findall(), re.search(), re.sub()"
echo "4. Named groups: (?P<name>pattern)"
echo "5. .format(**dict) for templates"
echo "------------------------------------------------------------------"