File I/O

Infrastructure automation constantly reads and writes files: configs, JSON responses, YAML manifests. Master file operations.

Reading Files

Basic Reading

# Read entire file
with open("config.txt", "r") as f:
    content = f.read()

# Read lines into list
with open("hosts.txt", "r") as f:
    lines = f.readlines()  # Includes \n
    lines = [line.strip() for line in lines]  # Remove \n

# Read line by line (memory efficient for large files)
with open("large_log.txt", "r") as f:
    for line in f:
        process(line.strip())

# Read specific number of characters
with open("file.txt", "r") as f:
    chunk = f.read(1024)  # Read 1024 chars

Context Manager

Always use with - it ensures file is closed even on error:

# Good - automatic cleanup
with open("file.txt", "r") as f:
    data = f.read()
# File automatically closed here

# Bad - manual close, can leak on exception
f = open("file.txt", "r")
data = f.read()
f.close()  # Might not run if exception above

File Modes

# Read modes
"r"   # Read text (default)
"rb"  # Read binary

# Write modes
"w"   # Write (truncates existing)
"wb"  # Write binary
"a"   # Append
"ab"  # Append binary

# Read/write
"r+"  # Read and write (file must exist)
"w+"  # Write and read (truncates)
"a+"  # Append and read

# Encoding
with open("file.txt", "r", encoding="utf-8") as f:
    content = f.read()

Writing Files

Basic Writing

# Write string
with open("output.txt", "w") as f:
    f.write("First line\n")
    f.write("Second line\n")

# Write multiple lines
lines = ["host1", "host2", "host3"]
with open("hosts.txt", "w") as f:
    f.writelines(line + "\n" for line in lines)

# Append to existing file
with open("log.txt", "a") as f:
    f.write(f"[{timestamp}] New entry\n")

# Write with print
with open("output.txt", "w") as f:
    print("Line 1", file=f)
    print("Line 2", file=f)

Safe Writing (Atomic)

import tempfile
import shutil
from pathlib import Path

def safe_write(path: Path, content: str) -> None:
    """Write file atomically - prevents corruption on crash."""
    # Write to temp file
    with tempfile.NamedTemporaryFile(
        mode="w",
        dir=path.parent,
        delete=False
    ) as f:
        f.write(content)
        temp_path = Path(f.name)

    # Atomic rename (works on same filesystem)
    temp_path.rename(path)

# Usage
safe_write(Path("config.yaml"), yaml_content)

pathlib (Modern Path Handling)

pathlib is the modern, object-oriented way to handle paths.

Basic Operations

from pathlib import Path

# Create path
path = Path("/etc/netapi/config.yaml")
path = Path.home() / ".config" / "netapi" / "config.yaml"
path = Path.cwd() / "output"

# Path components
path.name        # "config.yaml"
path.stem        # "config"
path.suffix      # ".yaml"
path.parent      # Path("/etc/netapi")
path.parts       # ('/', 'etc', 'netapi', 'config.yaml')

# Absolute/relative
path.is_absolute()     # True
path.resolve()         # Resolve symlinks, make absolute
path.relative_to("/etc")  # Path("netapi/config.yaml")

File Operations

from pathlib import Path

path = Path("config.yaml")

# Check existence
path.exists()
path.is_file()
path.is_dir()
path.is_symlink()

# Read/write (simple cases)
content = path.read_text()
path.write_text("new content")

data = path.read_bytes()
path.write_bytes(b"binary data")

# File info
path.stat().st_size    # Size in bytes
path.stat().st_mtime   # Modification time
path.owner()           # File owner (Unix)

# Create/delete
path.touch()           # Create empty file
path.unlink()          # Delete file
path.unlink(missing_ok=True)  # Don't error if missing

Directory Operations

from pathlib import Path

dir_path = Path("/etc/netapi")

# Create directory
dir_path.mkdir()
dir_path.mkdir(parents=True, exist_ok=True)  # Like mkdir -p

# Delete directory
dir_path.rmdir()  # Must be empty

# Delete recursively
import shutil
shutil.rmtree(dir_path)

# List contents
for item in dir_path.iterdir():
    print(item.name, "dir" if item.is_dir() else "file")

# Glob patterns
for yaml_file in dir_path.glob("*.yaml"):
    print(yaml_file)

# Recursive glob
for py_file in Path(".").glob("**/*.py"):
    print(py_file)

# rglob (recursive glob shorthand)
for py_file in Path(".").rglob("*.py"):
    print(py_file)

Path Manipulation

from pathlib import Path

# Join paths
config_dir = Path("/etc/netapi")
config_file = config_dir / "config.yaml"

# Change extension
path = Path("config.yaml")
json_path = path.with_suffix(".json")  # config.json

# Change name
backup_path = path.with_name("config.yaml.bak")

# Add to stem
path.with_stem(path.stem + "_backup")  # config_backup.yaml

# Expand user
Path("~/.config/netapi").expanduser()

JSON

Reading JSON

import json
from pathlib import Path

# From string
data = json.loads('{"hostname": "ise-01", "port": 443}')

# From file
with open("config.json", "r") as f:
    data = json.load(f)

# From Path
data = json.loads(Path("config.json").read_text())

# Handle missing file
def load_config(path: Path) -> dict:
    try:
        return json.loads(path.read_text())
    except FileNotFoundError:
        return {}
    except json.JSONDecodeError as e:
        print(f"Invalid JSON: {e}")
        return {}

Writing JSON

import json
from pathlib import Path

config = {
    "hostname": "ise-01",
    "port": 443,
    "endpoints": ["ep1", "ep2"]
}

# To string
json_str = json.dumps(config)

# Pretty print
json_str = json.dumps(config, indent=2)

# To file
with open("config.json", "w") as f:
    json.dump(config, f, indent=2)

# To Path
Path("config.json").write_text(json.dumps(config, indent=2))

# Options
json.dumps(config,
    indent=2,           # Pretty print
    sort_keys=True,     # Sort keys alphabetically
    ensure_ascii=False  # Allow unicode
)

Custom JSON Encoding

import json
from datetime import datetime
from pathlib import Path
from dataclasses import dataclass, asdict

# Custom encoder for non-standard types
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        if isinstance(obj, Path):
            return str(obj)
        if hasattr(obj, "__dict__"):
            return obj.__dict__
        return super().default(obj)

# Usage
data = {
    "timestamp": datetime.now(),
    "config_path": Path("/etc/netapi/config.yaml")
}
json.dumps(data, cls=CustomEncoder)

# Dataclass to JSON
@dataclass
class Endpoint:
    mac: str
    ip: str
    vlan: int

endpoint = Endpoint("00:11:22:33:44:55", "10.50.10.100", 10)
json.dumps(asdict(endpoint))

YAML

YAML is preferred for configuration files (more readable than JSON).

Reading YAML

import yaml
from pathlib import Path

# From string
data = yaml.safe_load("""
hostname: ise-01
port: 443
endpoints:
  - ep1
  - ep2
""")

# From file
with open("config.yaml", "r") as f:
    data = yaml.safe_load(f)

# From Path
data = yaml.safe_load(Path("config.yaml").read_text())

# Multiple documents
with open("multi.yaml", "r") as f:
    for doc in yaml.safe_load_all(f):
        print(doc)

Writing YAML

import yaml
from pathlib import Path

config = {
    "hostname": "ise-01",
    "port": 443,
    "endpoints": ["ep1", "ep2"]
}

# To string
yaml_str = yaml.dump(config)

# To file
with open("config.yaml", "w") as f:
    yaml.dump(config, f, default_flow_style=False)

# Options
yaml.dump(config,
    default_flow_style=False,  # Block style (readable)
    sort_keys=False,           # Preserve order
    allow_unicode=True,        # Allow unicode
    indent=2                   # Indentation
)

YAML with Custom Types

import yaml
from dataclasses import dataclass, asdict

@dataclass
class ISENode:
    hostname: str
    ip: str
    roles: list[str]

# Custom representer
def node_representer(dumper, node):
    return dumper.represent_dict(asdict(node))

yaml.add_representer(ISENode, node_representer)

# Now works
nodes = [
    ISENode("ise-01", "10.50.1.20", ["PAN"]),
    ISENode("ise-02", "10.50.1.21", ["PSN"])
]
yaml.dump({"nodes": nodes})

Configuration Files

INI Files

import configparser
from pathlib import Path

# Read INI
config = configparser.ConfigParser()
config.read("config.ini")

hostname = config["ise"]["hostname"]
port = config.getint("ise", "port")
verify = config.getboolean("ise", "verify_ssl")

# Write INI
config = configparser.ConfigParser()
config["ise"] = {
    "hostname": "ise-01",
    "port": "443",
    "verify_ssl": "true"
}

with open("config.ini", "w") as f:
    config.write(f)

Environment Files (.env)

from pathlib import Path
import os

def load_env(path: Path = Path(".env")) -> dict:
    """Load environment variables from file."""
    env = {}
    if not path.exists():
        return env

    for line in path.read_text().splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        if "=" in line:
            key, value = line.split("=", 1)
            env[key.strip()] = value.strip().strip('"\'')
    return env

# Load and set
for key, value in load_env().items():
    os.environ.setdefault(key, value)

# Or use python-dotenv
from dotenv import load_dotenv
load_dotenv()

TOML (Python 3.11+)

import tomllib  # Built-in Python 3.11+
from pathlib import Path

# Read TOML
with open("pyproject.toml", "rb") as f:
    data = tomllib.load(f)

# From string
data = tomllib.loads(Path("config.toml").read_text())

# Access nested
project_name = data["project"]["name"]
dependencies = data["project"]["dependencies"]

# Writing TOML requires tomli-w
import tomli_w

with open("config.toml", "wb") as f:
    tomli_w.dump(data, f)

Practical Patterns

Config Manager

from pathlib import Path
import json
import yaml
from dataclasses import dataclass, asdict, field

@dataclass
class Config:
    hostname: str
    port: int = 443
    verify_ssl: bool = True
    timeout: int = 30
    endpoints: list[str] = field(default_factory=list)

    @classmethod
    def load(cls, path: Path) -> "Config":
        """Load config from JSON or YAML."""
        if not path.exists():
            return cls(hostname="localhost")

        content = path.read_text()

        if path.suffix == ".json":
            data = json.loads(content)
        elif path.suffix in (".yaml", ".yml"):
            data = yaml.safe_load(content)
        else:
            raise ValueError(f"Unknown format: {path.suffix}")

        return cls(**data)

    def save(self, path: Path) -> None:
        """Save config to JSON or YAML."""
        data = asdict(self)

        if path.suffix == ".json":
            content = json.dumps(data, indent=2)
        elif path.suffix in (".yaml", ".yml"):
            content = yaml.dump(data, default_flow_style=False)
        else:
            raise ValueError(f"Unknown format: {path.suffix}")

        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_text(content)

# Usage
config = Config.load(Path("~/.config/netapi/config.yaml").expanduser())
config.hostname = "ise-01"
config.save(Path("~/.config/netapi/config.yaml").expanduser())

Log File Parser

from pathlib import Path
from dataclasses import dataclass
from datetime import datetime
import re

@dataclass
class LogEntry:
    timestamp: datetime
    level: str
    message: str

def parse_log(path: Path) -> list[LogEntry]:
    """Parse log file into structured entries."""
    pattern = re.compile(
        r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\s+(\w+)\s+(.+)"
    )
    entries = []

    for line in path.read_text().splitlines():
        if match := pattern.match(line):
            timestamp = datetime.strptime(match.group(1), "%Y-%m-%d %H:%M:%S")
            entries.append(LogEntry(
                timestamp=timestamp,
                level=match.group(2),
                message=match.group(3)
            ))

    return entries

# Filter errors
errors = [e for e in parse_log(Path("app.log")) if e.level == "ERROR"]

Backup with Rotation

from pathlib import Path
from datetime import datetime
import shutil

def backup_file(path: Path, max_backups: int = 5) -> Path:
    """Create timestamped backup, rotate old backups."""
    if not path.exists():
        raise FileNotFoundError(f"File not found: {path}")

    # Create backup directory
    backup_dir = path.parent / "backups"
    backup_dir.mkdir(exist_ok=True)

    # Create backup with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_path = backup_dir / f"{path.stem}_{timestamp}{path.suffix}"
    shutil.copy2(path, backup_path)

    # Rotate old backups
    backups = sorted(backup_dir.glob(f"{path.stem}_*{path.suffix}"))
    while len(backups) > max_backups:
        oldest = backups.pop(0)
        oldest.unlink()
        print(f"Deleted old backup: {oldest}")

    return backup_path

# Usage
backup_path = backup_file(Path("config.yaml"))
print(f"Backup created: {backup_path}")

Next Module

CLI Development - Click, arguments, options, Rich output.