Chapter 15: Generating Data

Visualize data. Generate plots for analysis and reporting.

Installing matplotlib

python -m pip install --user matplotlib

Simple Line Plot

import matplotlib.pyplot as plt

# Network latency samples (ms)
samples = [1, 2, 3, 4, 5]
latency = [12, 15, 11, 18, 14]

plt.style.use('seaborn-v0_8')  (1)
fig, ax = plt.subplots()

ax.plot(samples, latency, linewidth=2)

ax.set_title("Network Latency Over Time", fontsize=16)
ax.set_xlabel("Sample", fontsize=12)
ax.set_ylabel("Latency (ms)", fontsize=12)

plt.savefig('latency.png', bbox_inches='tight')  (2)
plt.show()
1 Built-in style for cleaner visuals
2 Save before show() - show() clears the figure

Squares and Scatter Plots

import matplotlib.pyplot as plt

x_values = range(1, 1001)
y_values = [x**2 for x in x_values]

plt.style.use('seaborn-v0_8')
fig, ax = plt.subplots()

ax.scatter(x_values, y_values, c=y_values, cmap=plt.cm.Blues, s=10)  (1)

ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=12)
ax.set_ylabel("Square of Value", fontsize=12)
ax.tick_params(labelsize=10)

ax.set_xlim(0, 1100)
ax.set_ylim(0, 1_100_000)

plt.savefig('squares.png', bbox_inches='tight')
1 c = color values, cmap = colormap, s = point size

Colormaps

# Sequential colormaps (data increasing)
plt.cm.Blues    # Light to dark blue
plt.cm.Greens   # Light to dark green
plt.cm.Reds     # Light to dark red
plt.cm.viridis  # Yellow to blue-green

# Diverging colormaps (data around center)
plt.cm.RdYlGn   # Red-yellow-green
plt.cm.coolwarm # Cool blue to warm red

Use c= parameter with data values:

ax.scatter(x, y, c=severity, cmap=plt.cm.RdYlGn_r)  (1)
1 _r suffix reverses colormap

Random Walk Visualization

Simulating system behavior over time:

from random import choice

class RandomWalk:
    """Generate random walk data."""

    def __init__(self, num_points=5000):
        self.num_points = num_points
        self.x_values = [0]
        self.y_values = [0]

    def fill_walk(self):
        """Calculate all points in the walk."""
        while len(self.x_values) < self.num_points:
            # Random direction and distance
            x_direction = choice([1, -1])
            x_distance = choice([0, 1, 2, 3, 4])
            x_step = x_direction * x_distance

            y_direction = choice([1, -1])
            y_distance = choice([0, 1, 2, 3, 4])
            y_step = y_direction * y_distance

            # Skip if no movement
            if x_step == 0 and y_step == 0:
                continue

            x = self.x_values[-1] + x_step
            y = self.y_values[-1] + y_step

            self.x_values.append(x)
            self.y_values.append(y)
# Visualize the walk
import matplotlib.pyplot as plt
from random_walk import RandomWalk

while True:
    rw = RandomWalk(50_000)
    rw.fill_walk()

    plt.style.use('classic')
    fig, ax = plt.subplots(figsize=(15, 9), dpi=128)

    point_numbers = range(rw.num_points)
    ax.scatter(rw.x_values, rw.y_values,
               c=point_numbers, cmap=plt.cm.Blues,
               edgecolors='none', s=1)  (1)
    ax.set_aspect('equal')

    # Highlight start and end
    ax.scatter(0, 0, c='green', edgecolors='none', s=100)
    ax.scatter(rw.x_values[-1], rw.y_values[-1],
               c='red', edgecolors='none', s=100)

    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    plt.savefig('random_walk.png', bbox_inches='tight')
    plt.show()

    keep_running = input("Make another walk? (y/n): ")
    if keep_running == 'n':
        break
1 edgecolors='none' removes point borders

Plotly: Interactive Charts

More interactive than matplotlib. Great for web dashboards.

python -m pip install --user plotly

Basic Die Simulation

from random import randint

class Die:
    """Represents a die."""

    def __init__(self, num_sides=6):
        self.num_sides = num_sides

    def roll(self):
        """Return random value between 1 and num_sides."""
        return randint(1, self.num_sides)
# die_visual.py
import plotly.express as px
from die import Die

# Roll die 1000 times
die = Die()
results = [die.roll() for _ in range(1000)]

# Count frequency of each result
frequencies = []
poss_results = range(1, die.num_sides + 1)
for value in poss_results:
    frequency = results.count(value)
    frequencies.append(frequency)

# Create bar chart
title = "Results of Rolling One D6 1,000 Times"
labels = {'x': 'Result', 'y': 'Frequency of Result'}
fig = px.bar(x=poss_results, y=frequencies, title=title, labels=labels)

fig.update_layout(xaxis_dtick=1)  (1)

fig.write_html('die_visual.html')  (2)
fig.show()
1 Show tick for each x value
2 Save as interactive HTML file

Two Dice

import plotly.express as px
from die import Die

die_1 = Die()
die_2 = Die()

results = [die_1.roll() + die_2.roll() for _ in range(50_000)]

max_result = die_1.num_sides + die_2.num_sides
poss_results = range(2, max_result + 1)

frequencies = [results.count(value) for value in poss_results]

title = "Results of Rolling Two D6 Dice 50,000 Times"
labels = {'x': 'Result', 'y': 'Frequency'}
fig = px.bar(x=poss_results, y=frequencies, title=title, labels=labels)

fig.update_layout(xaxis_dtick=1)
fig.show()

Different Dice

# D6 + D10
die_1 = Die()
die_2 = Die(10)

results = [die_1.roll() + die_2.roll() for _ in range(50_000)]

Infrastructure Example: Server Response Times

import matplotlib.pyplot as plt
from random import gauss

# Simulate response times from 3 servers
servers = ['web-01', 'web-02', 'web-03']
samples = 100

data = {}
for server in servers:
    # Different mean/stddev per server
    if server == 'web-01':
        data[server] = [gauss(50, 10) for _ in range(samples)]
    elif server == 'web-02':
        data[server] = [gauss(45, 8) for _ in range(samples)]
    else:
        data[server] = [gauss(60, 15) for _ in range(samples)]

plt.style.use('seaborn-v0_8')
fig, ax = plt.subplots()

for server, times in data.items():
    ax.plot(range(samples), times, label=server, alpha=0.7)

ax.set_title("Server Response Times")
ax.set_xlabel("Request")
ax.set_ylabel("Response Time (ms)")
ax.legend()
ax.axhline(y=100, color='red', linestyle='--', label='SLA Threshold')

plt.savefig('response_times.png', bbox_inches='tight')
plt.show()

Quick Reference

matplotlib Code

Create figure

fig, ax = plt.subplots()

Line plot

ax.plot(x, y)

Scatter plot

ax.scatter(x, y)

Bar chart

ax.bar(x, heights)

Set title

ax.set_title("Title")

Set labels

ax.set_xlabel("X"), ax.set_ylabel("Y")

Legend

ax.legend()

Save figure

plt.savefig('file.png')

Show figure

plt.show()

Set style

plt.style.use('seaborn-v0_8')

Plotly Code

Bar chart

px.bar(x=x, y=y)

Line chart

px.line(x=x, y=y)

Scatter

px.scatter(x=x, y=y)

Save HTML

fig.write_html('file.html')

Show

fig.show()

Exercises

15-1. Cubes

Plot first five cubes. Then plot first 5000 cubes.

15-2. Colored Cubes

Use colormap to visualize cube values.

15-3. Molecular Motion

Modify random walk with larger step sizes. What patterns emerge?

15-4. Modified Random Walk

Use choice([0, 1, 2, 3, 4, 5, 6, 7, 8]) for larger steps.

15-5. Three Dice

Create D6 + D6 + D6 simulation. What’s the most common sum?

15-6. Multiplication

Create D6 x D6 simulation (multiply instead of add).

Summary

  • matplotlib: plt.subplots() creates figure and axes

  • Plot types: plot() for lines, scatter() for points, bar() for bars

  • Colormaps visualize data progression

  • Random walks simulate stochastic processes

  • Plotly creates interactive HTML charts

  • Save figures before show() to avoid blank images

  • Styles (seaborn-v0_8) improve default aesthetics

Next: Working with external data files.