Chapter 15: Generating Data
Visualize data. Generate plots for analysis and reporting.
Installing matplotlib
python -m pip install --user matplotlib
Simple Line Plot
import matplotlib.pyplot as plt
# Network latency samples (ms)
samples = [1, 2, 3, 4, 5]
latency = [12, 15, 11, 18, 14]
plt.style.use('seaborn-v0_8') (1)
fig, ax = plt.subplots()
ax.plot(samples, latency, linewidth=2)
ax.set_title("Network Latency Over Time", fontsize=16)
ax.set_xlabel("Sample", fontsize=12)
ax.set_ylabel("Latency (ms)", fontsize=12)
plt.savefig('latency.png', bbox_inches='tight') (2)
plt.show()
| 1 | Built-in style for cleaner visuals |
| 2 | Save before show() - show() clears the figure |
Squares and Scatter Plots
import matplotlib.pyplot as plt
x_values = range(1, 1001)
y_values = [x**2 for x in x_values]
plt.style.use('seaborn-v0_8')
fig, ax = plt.subplots()
ax.scatter(x_values, y_values, c=y_values, cmap=plt.cm.Blues, s=10) (1)
ax.set_title("Square Numbers", fontsize=20)
ax.set_xlabel("Value", fontsize=12)
ax.set_ylabel("Square of Value", fontsize=12)
ax.tick_params(labelsize=10)
ax.set_xlim(0, 1100)
ax.set_ylim(0, 1_100_000)
plt.savefig('squares.png', bbox_inches='tight')
| 1 | c = color values, cmap = colormap, s = point size |
Colormaps
# Sequential colormaps (data increasing)
plt.cm.Blues # Light to dark blue
plt.cm.Greens # Light to dark green
plt.cm.Reds # Light to dark red
plt.cm.viridis # Yellow to blue-green
# Diverging colormaps (data around center)
plt.cm.RdYlGn # Red-yellow-green
plt.cm.coolwarm # Cool blue to warm red
Use c= parameter with data values:
ax.scatter(x, y, c=severity, cmap=plt.cm.RdYlGn_r) (1)
| 1 | _r suffix reverses colormap |
Random Walk Visualization
Simulating system behavior over time:
from random import choice
class RandomWalk:
"""Generate random walk data."""
def __init__(self, num_points=5000):
self.num_points = num_points
self.x_values = [0]
self.y_values = [0]
def fill_walk(self):
"""Calculate all points in the walk."""
while len(self.x_values) < self.num_points:
# Random direction and distance
x_direction = choice([1, -1])
x_distance = choice([0, 1, 2, 3, 4])
x_step = x_direction * x_distance
y_direction = choice([1, -1])
y_distance = choice([0, 1, 2, 3, 4])
y_step = y_direction * y_distance
# Skip if no movement
if x_step == 0 and y_step == 0:
continue
x = self.x_values[-1] + x_step
y = self.y_values[-1] + y_step
self.x_values.append(x)
self.y_values.append(y)
# Visualize the walk
import matplotlib.pyplot as plt
from random_walk import RandomWalk
while True:
rw = RandomWalk(50_000)
rw.fill_walk()
plt.style.use('classic')
fig, ax = plt.subplots(figsize=(15, 9), dpi=128)
point_numbers = range(rw.num_points)
ax.scatter(rw.x_values, rw.y_values,
c=point_numbers, cmap=plt.cm.Blues,
edgecolors='none', s=1) (1)
ax.set_aspect('equal')
# Highlight start and end
ax.scatter(0, 0, c='green', edgecolors='none', s=100)
ax.scatter(rw.x_values[-1], rw.y_values[-1],
c='red', edgecolors='none', s=100)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.savefig('random_walk.png', bbox_inches='tight')
plt.show()
keep_running = input("Make another walk? (y/n): ")
if keep_running == 'n':
break
| 1 | edgecolors='none' removes point borders |
Plotly: Interactive Charts
More interactive than matplotlib. Great for web dashboards.
python -m pip install --user plotly
Basic Die Simulation
from random import randint
class Die:
"""Represents a die."""
def __init__(self, num_sides=6):
self.num_sides = num_sides
def roll(self):
"""Return random value between 1 and num_sides."""
return randint(1, self.num_sides)
# die_visual.py
import plotly.express as px
from die import Die
# Roll die 1000 times
die = Die()
results = [die.roll() for _ in range(1000)]
# Count frequency of each result
frequencies = []
poss_results = range(1, die.num_sides + 1)
for value in poss_results:
frequency = results.count(value)
frequencies.append(frequency)
# Create bar chart
title = "Results of Rolling One D6 1,000 Times"
labels = {'x': 'Result', 'y': 'Frequency of Result'}
fig = px.bar(x=poss_results, y=frequencies, title=title, labels=labels)
fig.update_layout(xaxis_dtick=1) (1)
fig.write_html('die_visual.html') (2)
fig.show()
| 1 | Show tick for each x value |
| 2 | Save as interactive HTML file |
Two Dice
import plotly.express as px
from die import Die
die_1 = Die()
die_2 = Die()
results = [die_1.roll() + die_2.roll() for _ in range(50_000)]
max_result = die_1.num_sides + die_2.num_sides
poss_results = range(2, max_result + 1)
frequencies = [results.count(value) for value in poss_results]
title = "Results of Rolling Two D6 Dice 50,000 Times"
labels = {'x': 'Result', 'y': 'Frequency'}
fig = px.bar(x=poss_results, y=frequencies, title=title, labels=labels)
fig.update_layout(xaxis_dtick=1)
fig.show()
Different Dice
# D6 + D10
die_1 = Die()
die_2 = Die(10)
results = [die_1.roll() + die_2.roll() for _ in range(50_000)]
Infrastructure Example: Server Response Times
import matplotlib.pyplot as plt
from random import gauss
# Simulate response times from 3 servers
servers = ['web-01', 'web-02', 'web-03']
samples = 100
data = {}
for server in servers:
# Different mean/stddev per server
if server == 'web-01':
data[server] = [gauss(50, 10) for _ in range(samples)]
elif server == 'web-02':
data[server] = [gauss(45, 8) for _ in range(samples)]
else:
data[server] = [gauss(60, 15) for _ in range(samples)]
plt.style.use('seaborn-v0_8')
fig, ax = plt.subplots()
for server, times in data.items():
ax.plot(range(samples), times, label=server, alpha=0.7)
ax.set_title("Server Response Times")
ax.set_xlabel("Request")
ax.set_ylabel("Response Time (ms)")
ax.legend()
ax.axhline(y=100, color='red', linestyle='--', label='SLA Threshold')
plt.savefig('response_times.png', bbox_inches='tight')
plt.show()
Quick Reference
| matplotlib | Code |
|---|---|
Create figure |
|
Line plot |
|
Scatter plot |
|
Bar chart |
|
Set title |
|
Set labels |
|
Legend |
|
Save figure |
|
Show figure |
|
Set style |
|
| Plotly | Code |
|---|---|
Bar chart |
|
Line chart |
|
Scatter |
|
Save HTML |
|
Show |
|
Exercises
15-1. Cubes
Plot first five cubes. Then plot first 5000 cubes.
15-2. Colored Cubes
Use colormap to visualize cube values.
15-3. Molecular Motion
Modify random walk with larger step sizes. What patterns emerge?
15-4. Modified Random Walk
Use choice([0, 1, 2, 3, 4, 5, 6, 7, 8]) for larger steps.
15-5. Three Dice
Create D6 + D6 + D6 simulation. What’s the most common sum?
15-6. Multiplication
Create D6 x D6 simulation (multiply instead of add).
Summary
-
matplotlib:
plt.subplots()creates figure and axes -
Plot types:
plot()for lines,scatter()for points,bar()for bars -
Colormaps visualize data progression
-
Random walks simulate stochastic processes
-
Plotly creates interactive HTML charts
-
Save figures before
show()to avoid blank images -
Styles (
seaborn-v0_8) improve default aesthetics
Next: Working with external data files.