Chapter 17: Working with APIs

APIs provide live data. Query web services programmatically.

Installing requests

python -m pip install --user requests

Making API Requests

GitHub API Example

import requests

url = "https://api.github.com/search/repositories"
params = {
    "q": "language:python stars:>10000",
    "sort": "stars"
}

response = requests.get(url, params=params)  (1)

print(f"Status code: {response.status_code}")  (2)

data = response.json()  (3)
print(f"Total repositories: {data['total_count']}")
1 GET request with query parameters
2 200 = success
3 Parse JSON response

Examining the Response

import requests

url = "https://api.github.com/search/repositories"
params = {"q": "language:python stars:>10000", "sort": "stars"}
response = requests.get(url, params=params)
data = response.json()

# Top-level keys
print(data.keys())
# dict_keys(['total_count', 'incomplete_results', 'items'])

# Items is the list of repositories
repos = data['items']
print(f"Repositories returned: {len(repos)}")

# First repository
repo = repos[0]
print(repo.keys())

# Key fields
print(f"Name: {repo['name']}")
print(f"Owner: {repo['owner']['login']}")
print(f"Stars: {repo['stargazers_count']}")
print(f"URL: {repo['html_url']}")
print(f"Description: {repo['description']}")

Processing API Data

import requests

url = "https://api.github.com/search/repositories"
params = {"q": "language:python stars:>10000", "sort": "stars"}
response = requests.get(url, params=params)

if response.status_code != 200:
    print(f"Error: {response.status_code}")
else:
    data = response.json()
    repos = data['items']

    # Extract data
    names, stars, labels = [], [], []
    for repo in repos:
        names.append(repo['name'])
        stars.append(repo['stargazers_count'])

        # Handle missing descriptions
        label = repo['description'] or ""
        if len(label) > 50:
            label = label[:50] + "..."
        labels.append(label)

Visualizing API Data

import requests
import plotly.express as px

# Fetch data
url = "https://api.github.com/search/repositories"
params = {"q": "language:python stars:>10000", "sort": "stars"}
response = requests.get(url, params=params)
data = response.json()

repos = data['items']
repo_links, stars, hover_texts = [], [], []

for repo in repos:
    name = repo['name']
    url = repo['html_url']
    repo_links.append(f"<a href='{url}'>{name}</a>")  (1)
    stars.append(repo['stargazers_count'])

    owner = repo['owner']['login']
    description = repo['description'] or "No description"
    hover_texts.append(f"{owner}<br />{description}")

# Create chart
title = "Most Starred Python Projects on GitHub"
labels = {'x': 'Repository', 'y': 'Stars'}
fig = px.bar(x=repo_links, y=stars, title=title, labels=labels,
             hover_name=hover_texts)

fig.update_layout(
    xaxis_title="Repository",
    yaxis_title="Stars",
    title_font_size=24
)
fig.update_traces(marker_color='SteelBlue')

fig.write_html('python_repos.html')
fig.show()
1 HTML links make bars clickable

Customizing Plotly Charts

fig.update_layout(
    title_font_size=28,
    xaxis_title_font_size=20,
    yaxis_title_font_size=20,
)

fig.update_traces(
    marker_color='SteelBlue',
    marker_opacity=0.8,
)

# Custom colorscale based on data
fig = px.bar(x=names, y=stars, color=stars,
             color_continuous_scale='Viridis')

Working with Headers

Many APIs require authentication or custom headers:

import requests

url = "https://api.example.com/data"
headers = {
    "Authorization": "Bearer YOUR_TOKEN",
    "Accept": "application/json",
    "User-Agent": "MyApp/1.0"
}

response = requests.get(url, headers=headers)

Rate Limiting

APIs limit requests. Check headers:

response = requests.get(url)

# GitHub rate limit info
print(response.headers.get('X-RateLimit-Limit'))      # Max requests
print(response.headers.get('X-RateLimit-Remaining'))  # Requests left
print(response.headers.get('X-RateLimit-Reset'))      # Unix timestamp

Handle rate limits:

import time
import requests

def make_request(url):
    response = requests.get(url)

    if response.status_code == 429:  (1)
        retry_after = int(response.headers.get('Retry-After', 60))
        print(f"Rate limited. Waiting {retry_after} seconds...")
        time.sleep(retry_after)
        return make_request(url)

    return response
1 429 = Too Many Requests

Error Handling

import requests

try:
    response = requests.get(url, timeout=10)  (1)
    response.raise_for_status()  (2)
    data = response.json()
except requests.exceptions.Timeout:
    print("Request timed out")
except requests.exceptions.HTTPError as e:
    print(f"HTTP error: {e}")
except requests.exceptions.RequestException as e:
    print(f"Request failed: {e}")
except json.JSONDecodeError:
    print("Invalid JSON response")
1 10 second timeout
2 Raises exception for 4xx/5xx status codes

Infrastructure Example: Monitoring API

import requests
import matplotlib.pyplot as plt
from datetime import datetime

# Example: Query metrics from monitoring system
def get_server_metrics(host, metric, start, end):
    """Query metrics API for server data."""
    url = f"https://monitoring.example.com/api/v1/query_range"
    params = {
        "query": f'{metric}{{host="{host}"}}',
        "start": start.isoformat(),
        "end": end.isoformat(),
        "step": "5m"
    }
    headers = {"Authorization": "Bearer TOKEN"}

    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status()
    return response.json()

# Plot CPU usage for multiple servers
servers = ['web-01', 'web-02', 'db-01']
plt.style.use('seaborn-v0_8')
fig, ax = plt.subplots()

for server in servers:
    # In real code: data = get_server_metrics(server, 'cpu_usage', start, end)
    # Simulated data
    times = list(range(24))
    values = [50 + (i % 10) for i in times]

    ax.plot(times, values, label=server)

ax.set_title("Server CPU Usage")
ax.set_xlabel("Hour")
ax.set_ylabel("CPU %")
ax.legend()

plt.savefig('server_cpu.png', bbox_inches='tight')

Hacker News API

import requests

# Get top stories
url = "https://hacker-news.firebaseio.com/v0/topstories.json"
response = requests.get(url)
story_ids = response.json()[:10]  # Top 10 stories

# Fetch each story
stories = []
for story_id in story_ids:
    url = f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json"
    response = requests.get(url)
    story = response.json()
    stories.append(story)

# Display
for story in stories:
    title = story.get('title', 'No title')
    url = story.get('url', 'No URL')
    score = story.get('score', 0)
    print(f"{score} points: {title}")
    print(f"  {url}\n")

Pagination

APIs return limited results per page:

import requests

all_items = []
page = 1
per_page = 100

while True:
    url = "https://api.example.com/items"
    params = {"page": page, "per_page": per_page}
    response = requests.get(url, params=params)
    items = response.json()

    if not items:  (1)
        break

    all_items.extend(items)
    page += 1

print(f"Fetched {len(all_items)} total items")
1 Empty response = no more pages

Quick Reference

Operation Code

GET request

requests.get(url)

With params

requests.get(url, params=dict)

With headers

requests.get(url, headers=dict)

POST request

requests.post(url, json=data)

Check status

response.status_code

Parse JSON

response.json()

Raise on error

response.raise_for_status()

Set timeout

requests.get(url, timeout=10)

Status Code Meaning

200

Success

201

Created

400

Bad request

401

Unauthorized

403

Forbidden

404

Not found

429

Rate limited

500

Server error

Exercises

17-1. Other Languages

Query GitHub for top JavaScript or Rust repositories. Compare with Python.

17-2. Active Discussions

Use Hacker News API to find stories with most comments.

17-3. Testing Rate Limits

Print GitHub rate limit headers. How many requests remaining?

17-4. Network Device API

Write function to query a network device API (or mock one).

17-5. Multiple APIs

Combine data from two APIs in one visualization.

Summary

  • requests.get() makes HTTP requests

  • response.json() parses JSON responses

  • Always check status_code before processing

  • Handle errors: timeout, HTTP errors, JSON errors

  • Respect rate limits: check headers, handle 429

  • Pagination: loop until empty response

  • Plotly creates interactive visualizations from API data

  • Headers provide authentication and metadata

Project 2 complete. Next: Django web applications.