File I/O
Infrastructure automation constantly reads and writes files: configs, JSON responses, YAML manifests. Master file operations.
Reading Files
Basic Reading
# Read entire file
with open("config.txt", "r") as f:
content = f.read()
# Read lines into list
with open("hosts.txt", "r") as f:
lines = f.readlines() # Includes \n
lines = [line.strip() for line in lines] # Remove \n
# Read line by line (memory efficient for large files)
with open("large_log.txt", "r") as f:
for line in f:
process(line.strip())
# Read specific number of characters
with open("file.txt", "r") as f:
chunk = f.read(1024) # Read 1024 chars
Context Manager
Always use with - it ensures file is closed even on error:
# Good - automatic cleanup
with open("file.txt", "r") as f:
data = f.read()
# File automatically closed here
# Bad - manual close, can leak on exception
f = open("file.txt", "r")
data = f.read()
f.close() # Might not run if exception above
File Modes
# Read modes
"r" # Read text (default)
"rb" # Read binary
# Write modes
"w" # Write (truncates existing)
"wb" # Write binary
"a" # Append
"ab" # Append binary
# Read/write
"r+" # Read and write (file must exist)
"w+" # Write and read (truncates)
"a+" # Append and read
# Encoding
with open("file.txt", "r", encoding="utf-8") as f:
content = f.read()
Writing Files
Basic Writing
# Write string
with open("output.txt", "w") as f:
f.write("First line\n")
f.write("Second line\n")
# Write multiple lines
lines = ["host1", "host2", "host3"]
with open("hosts.txt", "w") as f:
f.writelines(line + "\n" for line in lines)
# Append to existing file
with open("log.txt", "a") as f:
f.write(f"[{timestamp}] New entry\n")
# Write with print
with open("output.txt", "w") as f:
print("Line 1", file=f)
print("Line 2", file=f)
Safe Writing (Atomic)
import tempfile
import shutil
from pathlib import Path
def safe_write(path: Path, content: str) -> None:
"""Write file atomically - prevents corruption on crash."""
# Write to temp file
with tempfile.NamedTemporaryFile(
mode="w",
dir=path.parent,
delete=False
) as f:
f.write(content)
temp_path = Path(f.name)
# Atomic rename (works on same filesystem)
temp_path.rename(path)
# Usage
safe_write(Path("config.yaml"), yaml_content)
pathlib (Modern Path Handling)
pathlib is the modern, object-oriented way to handle paths.
Basic Operations
from pathlib import Path
# Create path
path = Path("/etc/netapi/config.yaml")
path = Path.home() / ".config" / "netapi" / "config.yaml"
path = Path.cwd() / "output"
# Path components
path.name # "config.yaml"
path.stem # "config"
path.suffix # ".yaml"
path.parent # Path("/etc/netapi")
path.parts # ('/', 'etc', 'netapi', 'config.yaml')
# Absolute/relative
path.is_absolute() # True
path.resolve() # Resolve symlinks, make absolute
path.relative_to("/etc") # Path("netapi/config.yaml")
File Operations
from pathlib import Path
path = Path("config.yaml")
# Check existence
path.exists()
path.is_file()
path.is_dir()
path.is_symlink()
# Read/write (simple cases)
content = path.read_text()
path.write_text("new content")
data = path.read_bytes()
path.write_bytes(b"binary data")
# File info
path.stat().st_size # Size in bytes
path.stat().st_mtime # Modification time
path.owner() # File owner (Unix)
# Create/delete
path.touch() # Create empty file
path.unlink() # Delete file
path.unlink(missing_ok=True) # Don't error if missing
Directory Operations
from pathlib import Path
dir_path = Path("/etc/netapi")
# Create directory
dir_path.mkdir()
dir_path.mkdir(parents=True, exist_ok=True) # Like mkdir -p
# Delete directory
dir_path.rmdir() # Must be empty
# Delete recursively
import shutil
shutil.rmtree(dir_path)
# List contents
for item in dir_path.iterdir():
print(item.name, "dir" if item.is_dir() else "file")
# Glob patterns
for yaml_file in dir_path.glob("*.yaml"):
print(yaml_file)
# Recursive glob
for py_file in Path(".").glob("**/*.py"):
print(py_file)
# rglob (recursive glob shorthand)
for py_file in Path(".").rglob("*.py"):
print(py_file)
Path Manipulation
from pathlib import Path
# Join paths
config_dir = Path("/etc/netapi")
config_file = config_dir / "config.yaml"
# Change extension
path = Path("config.yaml")
json_path = path.with_suffix(".json") # config.json
# Change name
backup_path = path.with_name("config.yaml.bak")
# Add to stem
path.with_stem(path.stem + "_backup") # config_backup.yaml
# Expand user
Path("~/.config/netapi").expanduser()
JSON
Reading JSON
import json
from pathlib import Path
# From string
data = json.loads('{"hostname": "ise-01", "port": 443}')
# From file
with open("config.json", "r") as f:
data = json.load(f)
# From Path
data = json.loads(Path("config.json").read_text())
# Handle missing file
def load_config(path: Path) -> dict:
try:
return json.loads(path.read_text())
except FileNotFoundError:
return {}
except json.JSONDecodeError as e:
print(f"Invalid JSON: {e}")
return {}
Writing JSON
import json
from pathlib import Path
config = {
"hostname": "ise-01",
"port": 443,
"endpoints": ["ep1", "ep2"]
}
# To string
json_str = json.dumps(config)
# Pretty print
json_str = json.dumps(config, indent=2)
# To file
with open("config.json", "w") as f:
json.dump(config, f, indent=2)
# To Path
Path("config.json").write_text(json.dumps(config, indent=2))
# Options
json.dumps(config,
indent=2, # Pretty print
sort_keys=True, # Sort keys alphabetically
ensure_ascii=False # Allow unicode
)
Custom JSON Encoding
import json
from datetime import datetime
from pathlib import Path
from dataclasses import dataclass, asdict
# Custom encoder for non-standard types
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Path):
return str(obj)
if hasattr(obj, "__dict__"):
return obj.__dict__
return super().default(obj)
# Usage
data = {
"timestamp": datetime.now(),
"config_path": Path("/etc/netapi/config.yaml")
}
json.dumps(data, cls=CustomEncoder)
# Dataclass to JSON
@dataclass
class Endpoint:
mac: str
ip: str
vlan: int
endpoint = Endpoint("00:11:22:33:44:55", "10.50.10.100", 10)
json.dumps(asdict(endpoint))
YAML
YAML is preferred for configuration files (more readable than JSON).
Reading YAML
import yaml
from pathlib import Path
# From string
data = yaml.safe_load("""
hostname: ise-01
port: 443
endpoints:
- ep1
- ep2
""")
# From file
with open("config.yaml", "r") as f:
data = yaml.safe_load(f)
# From Path
data = yaml.safe_load(Path("config.yaml").read_text())
# Multiple documents
with open("multi.yaml", "r") as f:
for doc in yaml.safe_load_all(f):
print(doc)
Writing YAML
import yaml
from pathlib import Path
config = {
"hostname": "ise-01",
"port": 443,
"endpoints": ["ep1", "ep2"]
}
# To string
yaml_str = yaml.dump(config)
# To file
with open("config.yaml", "w") as f:
yaml.dump(config, f, default_flow_style=False)
# Options
yaml.dump(config,
default_flow_style=False, # Block style (readable)
sort_keys=False, # Preserve order
allow_unicode=True, # Allow unicode
indent=2 # Indentation
)
YAML with Custom Types
import yaml
from dataclasses import dataclass, asdict
@dataclass
class ISENode:
hostname: str
ip: str
roles: list[str]
# Custom representer
def node_representer(dumper, node):
return dumper.represent_dict(asdict(node))
yaml.add_representer(ISENode, node_representer)
# Now works
nodes = [
ISENode("ise-01", "10.50.1.20", ["PAN"]),
ISENode("ise-02", "10.50.1.21", ["PSN"])
]
yaml.dump({"nodes": nodes})
Configuration Files
INI Files
import configparser
from pathlib import Path
# Read INI
config = configparser.ConfigParser()
config.read("config.ini")
hostname = config["ise"]["hostname"]
port = config.getint("ise", "port")
verify = config.getboolean("ise", "verify_ssl")
# Write INI
config = configparser.ConfigParser()
config["ise"] = {
"hostname": "ise-01",
"port": "443",
"verify_ssl": "true"
}
with open("config.ini", "w") as f:
config.write(f)
Environment Files (.env)
from pathlib import Path
import os
def load_env(path: Path = Path(".env")) -> dict:
"""Load environment variables from file."""
env = {}
if not path.exists():
return env
for line in path.read_text().splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if "=" in line:
key, value = line.split("=", 1)
env[key.strip()] = value.strip().strip('"\'')
return env
# Load and set
for key, value in load_env().items():
os.environ.setdefault(key, value)
# Or use python-dotenv
from dotenv import load_dotenv
load_dotenv()
TOML (Python 3.11+)
import tomllib # Built-in Python 3.11+
from pathlib import Path
# Read TOML
with open("pyproject.toml", "rb") as f:
data = tomllib.load(f)
# From string
data = tomllib.loads(Path("config.toml").read_text())
# Access nested
project_name = data["project"]["name"]
dependencies = data["project"]["dependencies"]
# Writing TOML requires tomli-w
import tomli_w
with open("config.toml", "wb") as f:
tomli_w.dump(data, f)
Practical Patterns
Config Manager
from pathlib import Path
import json
import yaml
from dataclasses import dataclass, asdict, field
@dataclass
class Config:
hostname: str
port: int = 443
verify_ssl: bool = True
timeout: int = 30
endpoints: list[str] = field(default_factory=list)
@classmethod
def load(cls, path: Path) -> "Config":
"""Load config from JSON or YAML."""
if not path.exists():
return cls(hostname="localhost")
content = path.read_text()
if path.suffix == ".json":
data = json.loads(content)
elif path.suffix in (".yaml", ".yml"):
data = yaml.safe_load(content)
else:
raise ValueError(f"Unknown format: {path.suffix}")
return cls(**data)
def save(self, path: Path) -> None:
"""Save config to JSON or YAML."""
data = asdict(self)
if path.suffix == ".json":
content = json.dumps(data, indent=2)
elif path.suffix in (".yaml", ".yml"):
content = yaml.dump(data, default_flow_style=False)
else:
raise ValueError(f"Unknown format: {path.suffix}")
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content)
# Usage
config = Config.load(Path("~/.config/netapi/config.yaml").expanduser())
config.hostname = "ise-01"
config.save(Path("~/.config/netapi/config.yaml").expanduser())
Log File Parser
from pathlib import Path
from dataclasses import dataclass
from datetime import datetime
import re
@dataclass
class LogEntry:
timestamp: datetime
level: str
message: str
def parse_log(path: Path) -> list[LogEntry]:
"""Parse log file into structured entries."""
pattern = re.compile(
r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(\w+)\s+(.+)"
)
entries = []
for line in path.read_text().splitlines():
if match := pattern.match(line):
timestamp = datetime.strptime(match.group(1), "%Y-%m-%d %H:%M:%S")
entries.append(LogEntry(
timestamp=timestamp,
level=match.group(2),
message=match.group(3)
))
return entries
# Filter errors
errors = [e for e in parse_log(Path("app.log")) if e.level == "ERROR"]
Backup with Rotation
from pathlib import Path
from datetime import datetime
import shutil
def backup_file(path: Path, max_backups: int = 5) -> Path:
"""Create timestamped backup, rotate old backups."""
if not path.exists():
raise FileNotFoundError(f"File not found: {path}")
# Create backup directory
backup_dir = path.parent / "backups"
backup_dir.mkdir(exist_ok=True)
# Create backup with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = backup_dir / f"{path.stem}_{timestamp}{path.suffix}"
shutil.copy2(path, backup_path)
# Rotate old backups
backups = sorted(backup_dir.glob(f"{path.stem}_*{path.suffix}"))
while len(backups) > max_backups:
oldest = backups.pop(0)
oldest.unlink()
print(f"Deleted old backup: {oldest}")
return backup_path
# Usage
backup_path = backup_file(Path("config.yaml"))
print(f"Backup created: {backup_path}")
Next Module
CLI Development - Click, arguments, options, Rich output.