Visualization#

The viz module provides a collection of plotting helpers built on matplotlib.

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matviz.viz import (
    plot_range, plot_range_idx, plot_cdf, cplot, cscatter, ctext,
    polar_grid, plot_diag, plot_zero, plot_axes, subplotter, subplotter_auto,
    nicefy, logfit, streamgraph, jitter, interp_plot, linspecer, brighten,
    bar_centered, errorb, suplabel, format_axis_date, calc_plot_ROC, plot_ROC,
    title, plot
)

plot_range / plot_range_idx#

Shade vertical regions on a plot.

t = np.linspace(0, 10, 500)
y = np.sin(t)
plot(t, y)
plot_range([[1, 3], [6, 8]], color='red', alpha=0.2)
title('plot_range')
nicefy()
plt.show()
../_images/19ecc20b98ffc5011c9e47bf99ca43bea3cac9446c5899b1fab3768214f26113.png
from matviz.etl import start_and_ends

t = np.linspace(0, 10, 200)
y = np.sin(t)
plot(t, y)
events = start_and_ends(y > 0.5)
plot_range_idx(t, events, color='green', alpha=0.3)
title('plot_range_idx with start_and_ends')
nicefy()
plt.show()
../_images/8ee8cd19d1f14fdfc5051682e471515d1af7b67b35f9392e687cb9c93fbfd5c6.png

plot_cdf#

data1 = np.random.randn(5000)
data2 = np.random.randn(5000) + 1
plot_cdf(data1, label='Normal(0,1)')
plot_cdf(data2, label='Normal(1,1)')
plt.xlabel('Value')
plt.ylabel('Percentile')
plt.legend()
nicefy()
plt.show()
../_images/b97523da1f166a5c5867ae8317ac98432340c4580656ac5e1c47d8771af798d3.png

Complex number plotting: cplot, cscatter, ctext, polar_grid#

z = np.exp(1j * np.linspace(0, 2*np.pi, 100))
cplot(z * 2, 'b-', lw=2)

points = 2 * np.exp(1j * np.linspace(0, 2*np.pi, 8, endpoint=False))
cscatter(points, s=100, c='red', zorder=5)

ctext(0.1 + 0.1j, 'center', fontsize=12)

polar_grid(nrings=3, nrays=8)
title('Complex plane plotting')
nicefy()
plt.show()
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
Ignoring fixed x limits to fulfill fixed data aspect with adjustable data limits.
../_images/09df11476d9ab33ba08cf9a8f0f4aa4b3761da60bed471a0b8a0ea7778a4b2e4.png

Reference lines: plot_diag, plot_zero, plot_axes#

x = np.random.randn(100)
y = x + np.random.randn(100) * 0.3

plt.figure(figsize=(12, 4))

subplotter(1, 3, 0)
plt.scatter(x, y, alpha=0.5)
plot_diag(lw=2)
title('plot_diag')
nicefy()

subplotter(1, 3, 1)
plot(np.sin(np.linspace(0, 4*np.pi, 100)))
plot_zero()
title('plot_zero')
nicefy()

subplotter(1, 3, 2)
plt.scatter(np.random.randn(50), np.random.randn(50), alpha=0.5)
plot_axes()
title('plot_axes')
nicefy()

plt.show()
../_images/45a68f1fae74f6d4f7849601eb17c89c0b0571e8b550d236745af8a9ba43fe65.png

subplotter / subplotter_auto#

plt.figure(figsize=(10, 6))
for i in range(6):
    subplotter_auto(6, i)
    plt.plot(np.random.randn(50))
    nicefy()
    title(f'Panel {i}')

plt.show()
../_images/c85fb904067f1b13c922e612cea6e45102984bbdf90ed49d69e5f717b04c7d23.png

nicefy — before and after#

x = np.linspace(0, 10, 100)
y = np.sin(x)

plt.figure(figsize=(12, 4))

subplotter(1, 2, 0)
plt.plot(x, y)
plt.xlabel('x_axis_label')
plt.ylabel('y_axis_label')
title('Before nicefy')

subplotter(1, 2, 1)
plt.plot(x, y)
plt.xlabel('x_axis_label')
plt.ylabel('y_axis_label')
title('After nicefy')
nicefy()

plt.show()
../_images/0918346f94f502dcfe41c872d6c300c4064bbef6b68aea3a2592c3d2a49a8c6c.png

logfit#

x = np.arange(1, 50)

plt.figure(figsize=(15, 4))

subplotter(1, 3, 0)
slope, intercept = logfit(x, 2*x + 5 + np.random.randn(len(x))*3)
title(f'linear: slope={slope:.2f}')
nicefy()

subplotter(1, 3, 1)
slope, intercept = logfit(x, np.exp(0.1*x) + np.random.randn(len(x))*0.5, graph_type='logy')
title(f'logy: slope={slope:.3f}')
nicefy()

subplotter(1, 3, 2)
slope, intercept = logfit(x, x**2.5 * (1 + np.random.randn(len(x))*0.1), graph_type='loglog')
title(f'loglog: slope={slope:.2f}')
nicefy()

plt.show()
../_images/648ca539dd6ab98b159abad8fa875a8dc580a0663a125286b5925e39fba885d3.png

interp_plot#

# Simulate a signal with missing data
x = np.linspace(0, 10, 200)
y_true = np.sin(x) + 0.3 * np.sin(3 * x)

# Keep only sparse, irregularly sampled points
keep = np.sort(np.random.choice(len(x), 15, replace=False))
x_sparse = x[keep]
y_sparse = y_true[keep]

plt.plot(x, y_true, 'k--', alpha=0.3, label='true signal')
plt.plot(x_sparse, y_sparse, 'o', markersize=8, label='sampled points')
interp_plot(x_sparse, y_sparse, 'r-', lw=2, label='interp_plot')
plt.legend()
title('interp_plot — reconstruct from sparse samples')
nicefy()
plt.show()
../_images/c3f5d6ee568f3a1191452299e36e71aa957e11e55166b051fdcaca3760c7837b.png

linspecer and brighten#

colors = linspecer(6)
plt.figure(figsize=(12, 4))

subplotter(1, 2, 0)
for i, c in enumerate(colors):
    plt.bar(i, 1, color=c)
title('linspecer(6)')
nicefy()

subplotter(1, 2, 1)
base = colors[0]
fracs = [0.2, 0.4, 0.6, 0.8, 1.0]
for i, f in enumerate(fracs):
    plt.bar(i, 1, color=brighten(base, f))
title('brighten with different fracs')
nicefy()
plt.show()
../_images/0b7269dad28b4a905591f9fd9eab5288b28aa25d066d9de64feafd7d7276ea05.png

bar_centered and errorb#

plt.figure(figsize=(12, 4))

subplotter(1, 2, 0)
bar_centered([3, 7, 2, 5, 9], color='steelblue')
title('bar_centered')
nicefy()

subplotter(1, 2, 1)
data = pd.Series({
    'A': np.random.randn(50) + 3,
    'B': np.random.randn(50) + 5,
    'C': np.random.randn(50) + 4,
})
errorb(data)
title('errorb')
nicefy()

plt.show()
../_images/e24679db403d6361fda1a55362eec67033cad1854aa79fd909f23715d1293397.png

streamgraph#

np.random.seed(42)

def sample_events(peaks, n_total=600):
    """Sample event times from a mixture of gaussians."""
    times = []
    per_peak = n_total // len(peaks)
    for center, spread in peaks:
        times.extend(np.random.normal(center, spread, per_peak))
    return np.round(np.clip(times, 0, 100)).astype(int)

# Each category peaks at different times, creating an ebb-and-flow pattern
rows = []
for name, peaks in [
    ('Alpha', [(15, 7), (70, 5)]),
    ('Beta',  [(35, 9), (85, 4)]),
    ('Gamma', [(55, 10),]),
    ('Delta', [(10, 4), (45, 6), (80, 7)]),
]:
    for t in sample_events(peaks, 800):
        rows.append([t, name])

df_stream = pd.DataFrame(rows, columns=['time', 'category'])

plt.figure(figsize=(10, 4))
streamgraph(df_stream, smooth=5)
title('streamgraph')
nicefy()
plt.show()

ROC curves#

plt.figure(figsize=(12, 5))

# From two distributions
subplotter(1, 2, 0)
y1 = np.random.randn(1000)
y2 = np.random.randn(1000) + 1.5
auc_val = calc_plot_ROC(y1, y2)

# From labels and scores
subplotter(1, 2, 1)
y_true = np.array([0]*500 + [1]*500)
y_score = np.concatenate([np.random.randn(500), np.random.randn(500) + 2])
auc_val = plot_ROC(y_true, y_score, c='blue')

plt.show()
../_images/a021973eb1e1259f288213d57d38c25756e7266969860b90735c6ec0a21aea0d.png

suplabel and format_axis_date#

plt.figure(figsize=(10, 8))
for i in range(4):
    subplotter(2, 2, i)
    plot(np.random.randn(20))
    nicefy()
suplabel('x', 'Shared X Label')
suplabel('y', 'Shared Y Label')
plt.subplots_adjust(left=0.12, bottom=0.1)
plt.show()
../_images/0f3e0cf3a13346845188448b9e7c452dba4b2247a56853394378c4dcde198588.png

CubeRootScale#

from matviz import cbrt_scale

x = np.linspace(0, 100, 200)
y = x ** 3

plt.figure(figsize=(12, 4))

subplotter(1, 2, 0)
plt.plot(x, y)
title('Linear scale')
nicefy()

subplotter(1, 2, 1)
plt.plot(x, y)
plt.gca().set_yscale('cuberoot')
title('Cube root scale')
nicefy()

plt.show()
../_images/c69a572df8b116f306e86ee4ab8f499e38c5cdf13225d04b9531c1fd8b1573ee.png