Histograms#

matviz provides two main histogram functions:

  • nhist — smart 1D histograms with automatic binning

  • ndhist — 2D density histograms (heat maps)

Both use Scott’s normal reference rule for bin selection and handle edge cases gracefully.

import numpy as np
import matplotlib.pyplot as plt
from matviz.histogram_utils import nhist, ndhist
from matviz.viz import subplotter, title

nhist — 1D Histograms#

Single array#

y = np.random.randn(10000)
fig = nhist(y)
title('Single distribution')
plt.show()
../_images/dff30ff537d77b36e79fa0b70eb74fee1a5486fd5a7467507a308aad36359f37.png

Dictionary input — compare distributions#

A = {'mu=0': np.random.randn(100000), 'mu=2': np.random.randn(5000) + 2}
fig = nhist(A)
plt.show()
../_images/1f52342166047de05e49e11b5c74062002e1adb5a0ef36e8b3a1b9f61252b5eb.png

Key parameters#

A = {'Group A': np.random.randn(10000), 'Group B': np.random.randn(5000) + 1.5}

plt.figure(figsize=(15, 8))

subplotter(2, 3, 0)
nhist(A, f=0.5)
title('f=0.5 (fewer bins)')

subplotter(2, 3, 1)
nhist(A, f=4)
title('f=4 (more bins)')

subplotter(2, 3, 2)
nhist(A, color='viridis')
title('color="viridis"')

subplotter(2, 3, 3)
nhist(A, normalize='percent')
title('normalize="percent"')

subplotter(2, 3, 4)
nhist(A, same_bins_flag=True)
title('same_bins_flag=True')

subplotter(2, 3, 5)
nhist(A, noerror=True)
title('noerror=True')

plt.show()
../_images/19e304bfc4b8d3cd755a0b9ff563aa7ce0518bcc2b78322dd37d69be051382c7.png

Integer bins and axis limits#

integers = np.random.randint(0, 20, 5000)
fig = nhist(integers, int_bins_flag=True)
title('Integer bins')
plt.show()
../_images/49678aeeeb9b86cac0eccfbc9ed5de053e55509312172112a637092b23fbb0d7.png
data = np.random.randn(10000)
fig = nhist(data, minx=-2, maxx=2, exclude_extremes=True)
title('Clipped to [-2, 2], extremes excluded')
plt.show()
../_images/fd5b9b8cca7093bbca28400c1934aee7eec645487fc1ddf0f035aa2add9814c6.png

DataFrame input#

import pandas as pd

df = pd.DataFrame({
    'Sensor A': np.random.randn(1000),
    'Sensor B': np.random.randn(1000) + 0.5,
})
fig = nhist(df)
plt.show()
../_images/b07e0a01778d756eb3062df530eb63e01a0f2fcff22792c2b7330c2d20b216f5.png

Accessing return data#

fig = nhist(np.random.randn(1000))
print('Bin counts:', fig.nhist['N'][0][:5], '...')
print('Bin edges:', fig.nhist['bins'][0][:5], '...')
plt.show()
Bin counts: [ 0  1  2  5 12] ...
Bin edges: [-3.55368336 -3.26462916 -2.97557496 -2.68652076 -2.39746656] ...
../_images/99298505a4d2813c51a67731ba90b75fac42ee06e80b739864a31a2421f56458.png

ndhist — 2D Histograms#

Basic 2D histogram#

x = np.random.randn(5000)
y = x + np.random.randn(5000)
fig = ndhist(x, y)
plt.colorbar()
plt.xlabel('x')
plt.ylabel('y')
plt.show()
../_images/019077f44bc41cb7c30f776e61f3eeba1f6159c8295762439eb0ca9d30faa24b.png

Bin density with f, fx, fy#

The f parameter controls bin density relative to the default (Scott’s rule). Use fx and fy to control x and y axes independently.

x = np.random.randn(5000)
y = x + np.random.randn(5000)

plt.figure(figsize=(10, 8))

subplotter(2, 2, 0)
ndhist(x, y)
title('default')
plt.colorbar()

subplotter(2, 2, 1)
ndhist(x, y, fx=0.5)
title('fx=0.3 (fewer x bins)')
plt.colorbar()

subplotter(2, 2, 2)
ndhist(x, y, fy=0.5)
title('fy=0.3 (fewer y bins)')
plt.colorbar()

subplotter(2, 2, 3)
ndhist(x, y, f=0.5)
title('f=0.3 (fewer bins on both)')
plt.colorbar()

plt.show()
../_images/74d0cc927578a3d6eaf3c19192cb85e7e30e3930aaad699239a2fd123805dd51.png

Complex numbers#

z = (5 + np.random.randn(10000)) * np.exp(1j * (np.random.randn(10000) + np.pi/4))
fig = ndhist(z, smooth=1)
plt.colorbar()
plt.xlabel('Real')
plt.ylabel('Imaginary')
plt.show()
../_images/aa3097427658834cf707baceedce4ab72865b92423144a978410879b150c16d8.png

Time series mode#

n = 10000
y = np.cumsum(np.random.randn(n)) + 15 * np.random.randn(n)
fig = ndhist(y, fx=5)
plt.xlabel('Sample number')
plt.ylabel('Value')
plt.colorbar()
plt.show()
../_images/1cb58455ccb473120f16afa09bfbacc92e272c60fe758b59e56cd15fd1b0aecb.png

Log colorbar#

x = np.concatenate([np.ones(50), np.random.randn(15000), 4 + np.random.randn(1000)/2])
y = np.concatenate([np.ones(50), np.random.randn(15000), 3 + np.random.randn(1000)/2])
fig = ndhist(x, y, log_colorbar_flag=True)
plt.colorbar(label='log10(1 + count)')
plt.show()
../_images/c4c2db6e3a70019f52d146b66da6db3eec5532cff878de00efaf66d864e2c38a.png

Normalization and smoothing#

n = 10000
y = np.cumsum(np.random.randn(n)) + 15 * np.random.randn(n)

plt.figure(figsize=(15, 4))

subplotter(1, 3, 0)
ndhist(y, fx=5, normy=True)
title('normy=True')
plt.colorbar()

subplotter(1, 3, 1)
ndhist(y, fx=5, normx=True)
title('normx=True')
plt.colorbar()

subplotter(1, 3, 2)
ndhist(y, fx=5, smooth=2)
title('smooth=2')
plt.colorbar()

plt.show()
../_images/6ada4c4dd9e3c3985edd06434b323f116364b5de6b08ea411115a7665da3d0cc.png

Contour levels#

x = np.random.randn(50000)
y = x * 0.5 + np.random.randn(50000) * 0.5

plt.figure(figsize=(12, 4))

subplotter(1, 2, 0)
ndhist(x, y, f=0.5, levels=True)
title('levels=True (filled contours)')
plt.colorbar()

subplotter(1, 2, 1)
ndhist(x, y, f=0.5, levels=[25, 50, 75, 90])
title('levels=[25, 50, 75, 90]')

plt.show()
../_images/3746160808da31acf1f01f97e9b8fc7042fb5203948332190abee4f132d5cf60.png

Accessing return data#

fig = ndhist(np.random.randn(5000), np.random.randn(5000))
print('Counts shape:', fig.ndhist['counts'].shape)
print('bins_x length:', len(fig.ndhist['bins_x']))
print('bins_y length:', len(fig.ndhist['bins_y']))
plt.colorbar()
plt.show()
Counts shape: (56, 57)
bins_x length: 57
bins_y length: 58
../_images/64828608858c76593ead2e45649b0c20af19ae03f15d794806bf9050e6bfbfbd.png