NumPy Cheatsheet Cheatsheet

⚡

Array Creation

FUNDAMENTALS

array_basics.py

import numpy as np

# ── From Python data ──
a = np.array([1, 2, 3, 4, 5])                  # 1D array
a = np.array([[1, 2, 3], [4, 5, 6]])           # 2D array
a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # 3D array

# ── Specify dtype ──
a = np.array([1, 2, 3], dtype=np.float32)
a = np.array([1, 2, 3], dtype='int8')
a = np.array([True, False, True], dtype=bool)

# ── Built-in creation functions ──
np.zeros((3, 4))                 # 3×4 array of 0.0
np.zeros((3, 4), dtype=np.int32) # 3×4 array of 0
np.ones((2, 3))                  # 2×3 array of 1.0
np.full((3, 3), fill_value=7)    # 3×3 array of 7
np.empty((2, 3))                 # uninitialized (garbage values)
np.empty_like(a)                 # same shape/type, uninitialized

# ── Ranges & sequences ──
np.arange(0, 10, 2)              # [0, 2, 4, 6, 8]
np.arange(5)                     # [0, 1, 2, 3, 4]
np.linspace(0, 1, 5)             # [0, 0.25, 0.5, 0.75, 1.0]
np.linspace(0, 2*np.pi, 100)    # 100 points for sin/cos
np.geomspace(1, 1000, 4)         # [1, 10, 100, 1000] (log scale)
np.logspace(0, 4, 5)             # [1, 10, 100, 1000, 10000]

# ── Random (NumPy 2.0+ Generator API) ──
rng = np.random.default_rng(42)
rng.random((3, 3))               # uniform [0, 1)
rng.integers(0, 100, size=(3, 3))
rng.standard_normal((3, 3))      # standard normal
rng.normal(0, 1, size=(3, 3))    # N(μ=0, σ²=1)

# ── Identity & diagonal ──
np.eye(4)                        # 4×4 identity
np.eye(3, k=1)                   # identity shifted by 1
np.diag([1, 2, 3, 4])           # diagonal matrix from list
np.diag(np.arange(5))            # diagonal from array
np.tri(4)                        # lower triangular matrix

# ── meshgrid (for 3D plots, grid evaluation) ──
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = np.sin(X) * np.cos(Y)

array_properties.py

a = np.array([[1, 2, 3], [4, 5, 6]])

# ── Core properties ──
a.ndim                           # number of dimensions (axes)
a.shape                          # (rows, cols) tuple
a.size                           # total elements
a.dtype                          # data type
a.itemsize                       # bytes per element
a.nbytes                         # total bytes (size × itemsize)
a.T                             # transpose

# ── Reshaping ──
a.reshape(3, 2)                  # change shape (must match size)
a.reshape(-1)                    # flatten to 1D
a.reshape(-1, 1)                 # column vector
a.reshape(1, -1)                 # row vector
a.ravel()                        # flatten (returns view if possible)
a.flatten()                      # flatten (always returns copy)
a.resize((3, 2))                 # resize in-place (may discard data)
np.squeeze(a)                    # remove size-1 dimensions

# ── Adding dimensions ──
np.newaxis                      # alias for None
a[np.newaxis, :]                # add axis at position 0 (1, M, N)
a[:, np.newaxis]                # add axis at position 1 (M, 1, N)
np.expand_dims(a, axis=0)       # same as newaxis
np.expand_dims(a, axis=(0, 2))  # add at multiple positions

# ── Tiling & repeating ──
np.tile(a, (2, 3))              # repeat entire array 2×3
np.repeat(a, 3, axis=0)         # repeat each row 3 times
np.repeat(a, 3, axis=1)         # repeat each column 3 times

# ── Common patterns ──
grid = np.indices((3, 3))       # [[row indices], [col indices]]
I, J = np.indices((3, 3))
# I = [[0,0,0],[1,1,1],[2,2,2]]
# J = [[0,1,2],[0,1,2],[0,1,2]]

NumPy Data Types

Type	Code	Range / Description
int8	i1	-128 to 127
int16	i2	-32,768 to 32,767
int32	i4	±2.1 billion
int64	i8	±9.2 × 10¹⁸
float16	f2	Half precision
float32	f4	Single precision (~7 digits)
float64	f8	Double precision (~15 digits)
complex64	c8	Two float32
complex128	c16	Two float64
bool_	?	True / False

Creation Quick Reference

Function	Shape	Filled With
zeros((m,n))	m×n	0.0
ones((m,n))	m×n	1.0
full((m,n), v)	m×n	v
empty((m,n))	m×n	Uninitialized
eye(n)	n×n	Identity matrix
arange(a,b,s)	1D	a, a+s, a+2s, ...
linspace(a,b,n)	1D	n evenly spaced
diag(v)	n×n	Diagonal from v

💡

Use rng = np.random.default_rng(seed) (NumPy 1.17+) instead of the legacy np.random module. The new Generator API is faster, more flexible, and has better statistical properties.

✂️

Indexing & Slicing

CORE

basic_indexing.py

a = np.array([10, 20, 30, 40, 50, 60, 70, 80])

# ── 1D indexing ──
a[0]                            # 10 (first element)
a[-1]                           # 80 (last element)
a[2:5]                          # [30, 40, 50] (indices 2,3,4)
a[::2]                          # [10, 30, 50, 70] (every other)
a[::-1]                         # [80, 70, ..., 10] (reversed)
a[1::2]                         # [20, 40, 60, 80] (even indices)

# ── 2D indexing ──
b = np.arange(12).reshape(3, 4)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

b[0, 1]                         # 1 (row 0, col 1)
b[1]                            # [4, 5, 6, 7] (entire row)
b[:, 2]                         # [2, 6, 10] (entire column)
b[0:2, 1:3]                     # [[1,2], [5,6]] (sub-matrix)
b[:, ::2]                       # every other column
b[::-1, :]                      # reversed rows
b[-1]                           # last row

# ── Assigning values ──
a[2:5] = [99, 98, 97]
b[:, 0] = -1
b[b > 5] = 0                    # conditional assignment

fancy_indexing.py

# ── Fancy (integer array) indexing ──
a = np.arange(10)
a[[3, 1, 7, 5]]                 # [3, 1, 7, 5]
a[np.array([0, 0, 3, 8, 8])]   # [0, 0, 3, 8, 8] (duplicates OK)

# 2D fancy indexing
b = np.arange(12).reshape(3, 4)
rows = [0, 2, 1]
cols = [1, 3, 0]
b[rows, cols]                   # [1, 11, 4] — pairs (0,1), (2,3), (1,0)

# ── Boolean indexing ──
a = np.array([5, 3, 8, 1, 9, 2, 7])
mask = a > 4                    # [True, False, True, ...]
a[mask]                         # [5, 8, 9, 7]
a[a > 4]                        # one-liner
a[(a > 3) & (a < 8)]           # [5, 7] (use & | ~ for boolean logic)
a[(a < 3) | (a > 8)]           # [1, 2, 9]

# ── np.where ──
np.where(a > 4, a, 0)          # replace values ≤4 with 0
np.where(a > 4)                 # returns (indices,) where condition True
np.where(a > 4, 'yes', 'no')   # string result

# ── np.take / np.put ──
np.take(a, [0, 2, 4])           # same as a[[0,2,4]]
np.put(a, [1, 3, 5], [99, 88, 77])  # in-place replacement

# ── np.select ──
conditions = [a < 3, a < 7, a >= 7]
choices = ['low', 'mid', 'high']
np.select(conditions, choices, default='unknown')

# ── np.argmax / np.argmin / np.argsort ──
a.argmax()                      # index of max value
a.argmin()                      # index of min value
a.argsort()                     # indices that sort the array
a[a.argsort()]                  # sorted array

# ── np.nonzero ──
b = np.array([[0, 1, 0], [1, 0, 1]])
np.nonzero(b)                   # (array([0,1,1]), array([1,0,2]))
b[np.nonzero(b)]                # [1, 1, 1]

Indexing Types

Type	Syntax	Returns
Basic	a[5]	Scalar or slice
Slice	a[2:8:2]	View (no copy)
Fancy	a[[1,3,5]]	Copy (new array)
Boolean	a[mask]	Copy (filtered)
np.where	np.where(cond)	Indices or values
Ellipsis	a[..., 0]	Selects remaining dims

View vs Copy

Slice a[2:5]View — shares memory

Fancy a[[1,3]]Copy — independent memory

Boolean a[mask]Copy — independent memory

.reshape()View when possible

.ravel()View when possible

.flatten()Always a copy

a.baseNone if not a view

a.copy()Force independent copy

⚠️

Careful: Slicing returns a view (shared memory), so modifying the slice modifies the original. Use .copy() to get an independent array if needed.

📡

Broadcasting

KEY CONCEPT

broadcasting_rules.py

# ── Broadcasting: NumPy stretches smaller arrays to match larger ones
# WITHOUT actually copying the data (virtual replication)

# ── Scalar + array ──
a = np.array([[1, 2, 3],
              [4, 5, 6]])      # shape (2, 3)
a + 10                          # scalar → (1,1) → (2,3): add 10 to all
# [[11, 12, 13], [14, 15, 16]]

# ── 1D + 2D ──
row = np.array([10, 20, 30])    # shape (3,)
a + row                          # row → (1,3) → (2,3): add to each row
# [[11, 22, 33], [14, 25, 36]]

# ── Column vector + 2D ──
col = np.array([[10], [20]])     # shape (2, 1)
a + col                          # col → (2,1) → (2,3): add to each column
# [[11, 12, 13], [24, 25, 26]]

# ── 1D + 1D (outer operation) ──
x = np.array([1, 2, 3])         # (3,)
y = np.array([10, 20, 30])      # (3,)
x[:, np.newaxis] + y             # (3,1) + (3,) → (3,3)
# [[11, 21, 31], [12, 22, 32], [13, 23, 33]]

# ── Incompatible shapes (error) ──
# a.shape = (2, 3)
# b.shape = (2,)
# a + b  → ValueError! Can't broadcast (2,3) with (2,)

broadcasting_examples.py

# ── Broadcasting rules (summary) ──
# 1. Compare shapes from the trailing dimension forward
# 2. Dimensions are compatible if:
#    a) They are equal, OR
#    b) One of them is 1
# 3. Array with dim=1 is virtually stretched

# Shape compatibility examples:
# (3, 4) + (4,)       → OK   → (3, 4)
# (3, 1) + (1, 4)     → OK   → (3, 4)
# (3, 4, 1) + (4,)    → OK   → (3, 4, 4)
# (2, 3, 4) + (1, 4)  → OK   → (2, 3, 4)
# (2, 3) + (4,)       → FAIL (3 ≠ 4 and neither is 1)

# ── Practical examples ──
# Center columns (subtract mean per column)
X = np.random.randn(100, 5)
X_centered = X - X.mean(axis=0)   # (100,5) - (5,) → broadcast

# Normalize rows
row_norms = np.linalg.norm(X, axis=1, keepdims=True)  # (100,1)
X_normalized = X / row_norms     # (100,5) / (100,1) → broadcast

# Outer product
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
outer = a[:, np.newaxis] * b     # (3,1) * (3,) → (3,3)

# Distance matrix
points = np.random.randn(10, 2)
diff = points[:, np.newaxis, :] - points[np.newaxis, :, :]
dist = np.linalg.norm(diff, axis=2)  # (10, 10) pairwise distances

# ── keepdims=True preserves dimensions for broadcasting ──
X = np.arange(12).reshape(3, 4)
X.sum(axis=1)            # shape (3,) — NOT broadcastable back
X.sum(axis=1, keepdims=True)  # shape (3,1) — broadcastable to (3,4)

Broadcasting Rules

Rule	Description
1. Align	Compare shapes right-to-left
2. Equal or 1	Dims must match or one must be 1
3. Stretch	Dim=1 is virtually replicated
4. Missing	Missing left dims are treated as 1
5. Error	If dims conflict → ValueError

Common Patterns

Pattern	Code	Use Case
Center columns	X - X.mean(0)	Standardize features
Normalize rows	X / norm(X, axis=1, keepdims=True)	Row-wise unit vectors
Outer product	a[:, None] * b	Cross-combination
Add bias	X + bias[np.newaxis, :]	Add bias vector
Distance matrix	\|\|x_i - x_j\|\|	Pairwise distances
Mask broadcast	X * mask[:, None]	Apply row masks

💡

Key insight: Use keepdims=True with .sum(), .mean(), etc. to preserve shape for broadcasting. Without it, the reduced axis disappears and you'll get shape mismatches.

🔢

Math Functions (Ufuncs)

VECTORIZED

arithmetic_ufuncs.py

a = np.array([1, 2, 3, 4, 5])

# ── Arithmetic (element-wise) ──
np.add(a, 10)                    # or: a + 10
np.subtract(a, 3)                # or: a - 3
np.multiply(a, 2)                # or: a * 2
np.divide(a, 2)                  # or: a / 2
np.floor_divide(a, 2)            # or: a // 2
np.power(a, 3)                   # or: a ** 3
np.mod(a, 3)                     # or: a % 3
np.negative(a)                   # or: -a
np.abs(a)
np.fmod(a, 3)                    # C-style mod (sign follows divisor)

# ── Rounding ──
np.round(3.14159, 2)             # 3.14
np.floor(3.7)                    # 3.0
np.ceil(3.2)                     # 4.0
np.trunc(-3.7)                   # -3.0
np.rint(3.5)                     # 4.0 (round to nearest even)

# ── Aggregate reductions ──
a.sum()                          # sum of all
a.prod()                         # product
a.cumsum()                       # cumulative sum
a.cumprod()                      # cumulative product
a.min(), a.max()
a.argmin(), a.argmax()           # indices of min/max
a.mean()
a.std()                          # standard deviation
a.var()                          # variance
np.percentile(a, [25, 50, 75])   # quartiles
np.nanmean(a)                    # mean ignoring NaN
np.nansum(a)                     # sum ignoring NaN

# ── With axis parameter ──
b = np.arange(12).reshape(3, 4)
b.sum(axis=0)                    # column sums (shape: 4,)
b.sum(axis=1)                    # row sums (shape: 3,)
b.sum(axis=0, keepdims=True)     # column sums (shape: 1,4)

mathematical_ufuncs.py

# ── Trigonometric ──
x = np.linspace(0, 2*np.pi, 100)
np.sin(x)
np.cos(x)
np.tan(x)
np.arcsin(a)                     # inverse sine
np.arccos(a)                     # inverse cosine
np.arctan(a)                     # inverse tangent
np.arctan2(y, x)                 # angle of (x,y) in radians
np.hypot(3, 4)                   # 5.0 (sqrt(9+16))
np.degrees(np.pi)                # 180.0
np.radians(180)                  # π

# ── Exponential & logarithmic ──
np.exp(a)                        # e^a
np.exp2(a)                       # 2^a
np.expm1(a)                      # e^a - 1 (accurate for small a)
np.log(a)                        # natural log
np.log2(a)                       # base-2 log
np.log10(a)                      # base-10 log
np.log1p(a)                      # log(1+a) (accurate for small a)
np.logaddexp(x, y)               # log(exp(x) + exp(y))

# ── Hyperbolic ──
np.sinh(x)
np.cosh(x)
np.tanh(x)                       # often used in ML activations

# ── Misc ──
np.sqrt(a)                       # square root
np.square(a)                     # a²
np.cbrt(a)                       # cube root
np.sign(a)                       # -1, 0, or 1
np.clip(a, 0, 10)               # clip values to [0, 10]
np.maximum(a, 0)                 # element-wise max (like ReLU)
np.minimum(a, 0)                 # element-wise min
np.fmax(a, b)                    # max ignoring NaN
np.fmin(a, b)                    # min ignoring NaN
np.isfinite(a)                   # not inf and not NaN
np.isinf(a)
np.isnan(a)
np.isclose(a, b, rtol=1e-5)      # element-wise close comparison
np.allclose(a, b)                # all elements close?

Ufunc Methods

Method	Description	Example
.reduce()	Apply to all elements	np.add.reduce(a) = sum(a)
.accumulate()	Cumulative results	np.add.accumulate(a) = cumsum
.outer()	Outer operation	np.add.outer([1,2],[3,4])
.at()	Unbuffered in-place	np.add.at(a, idx, vals)
.out=	Output array	np.add(a, b, out=c)

Special Functions

Function	Description
np.special.erf(x)	Error function
np.special.gamma(x)	Gamma function
np.special.beta(a, b)	Beta function
np.special.comb(n, k)	Binomial coefficient
np.special.perm(n, k)	Permutations
np.i0(x)	Modified Bessel function

💡

Always use NumPy ufuncs instead of Python loops. np.sqrt(arr) is 100x+ faster than [math.sqrt(x) for x in arr]because it's implemented in C with SIMD vectorization.

📐

Linear Algebra

MATRIX OPS

matrix_operations.py

import numpy.linalg as la

A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

# ── Element-wise operations (NOT matrix ops) ──
A * B                           # element-wise multiply
A + B                           # element-wise add
A ** 2                          # element-wise square

# ── Matrix multiplication ──
A @ B                           # matrix multiply (2×2 × 2×2 = 2×2)
np.matmul(A, B)                 # same as @
np.dot(A, B)                    # works for 1D and 2D
A.dot(B)                        # method form

# ── Vector dot product ──
v = np.array([1, 2, 3])
w = np.array([4, 5, 6])
np.dot(v, w)                    # 32
v @ w                           # 32

# ── Determinant ──
la.det(A)                       # -2.0

# ── Inverse ──
la.inv(A)                       # [[-2, 1], [1.5, -0.5]]
A_inv = la.pinv(A)              # pseudo-inverse (works for non-square)

# ── Solve linear system Ax = b ──
b = np.array([5, 6])
x = la.solve(A, b)              # x = A⁻¹b
# Verify: A @ x ≈ b

# ── Rank ──
la.matrix_rank(A)               # 2

# ── Trace ──
np.trace(A)                     # 5 (sum of diagonal)

decompositions.py

# ── Eigenvalues & eigenvectors ──
eigenvalues, eigenvectors = la.eig(A)
eigenvalues                     # array of eigenvalues λ
eigenvectors[:, i]              # eigenvector for eigenvalue i

# For symmetric matrices, use eigh (faster, stable)
eigenvalues, eigenvectors = la.eigh(A)

# ── SVD (Singular Value Decomposition) ──
# A = U @ diag(S) @ Vt
U, S, Vt = la.svd(A, full_matrices=True)
U, S, Vt = la.svd(A, full_matrices=False)  # compact SVD

# Truncated SVD (dimensionality reduction)
U, S, Vt = la.svd(A, full_matrices=False)
k = 2
A_reconstructed = U[:, :k] @ np.diag(S[:k]) @ Vt[:k, :]

# ── LU Decomposition ──
from scipy.linalg import lu
P, L, U = lu(A)

# ── QR Decomposition ──
Q, R = la.qr(A)                 # A = QR, Q orthogonal, R upper triangular
Q, R = la.qr(A, mode='reduced')  # economy size

# ── Cholesky Decomposition (symmetric positive definite) ──
L = la.cholesky(A)
# A = L @ L.T

# ── Matrix norms ──
la.norm(A)                      # Frobenius norm
la.norm(A, ord=2)               # spectral norm (largest singular value)
la.norm(A, ord=np.inf)          # max row sum
la.norm(A, ord=1)               # max column sum
la.norm(A, ord='nuc')           # nuclear norm (sum of singular values)
la.cond(A)                      # condition number

# ── Matrix properties ──
la.matrix_power(A, 3)           # A³ = A @ A @ A
la.matrix_rank(A)               # rank
np.allclose(A @ la.inv(A), np.eye(2))  # verify inverse

Linear Algebra Reference

Function	Description	Returns
la.det(A)	Determinant	Scalar
la.inv(A)	Matrix inverse	Matrix
la.pinv(A)	Pseudo-inverse	Matrix
la.solve(A, b)	Solve Ax=b	Vector
la.eig(A)	Eigen decomposition	values, vectors
la.eigh(A)	Eigen (symmetric)	values, vectors
la.svd(A)	Singular value decomp	U, S, Vt
la.qr(A)	QR decomposition	Q, R
la.cholesky(A)	Cholesky decomp	L

Matrix Multiply: @ vs *

Operator	Name	Behavior
A @ B	matmul	Matrix multiplication
A * B	multiply	Element-wise multiply
A ** 2	power	Element-wise square
A.T	transpose	Matrix transpose
A.conj().T	adjoint	Conjugate transpose

⚠️

Use la.eigh() instead of la.eig()for symmetric/Hermitian matrices — it's faster, more numerically stable, and returns real eigenvalues guaranteed.

🎲

Random Numbers

STOCHASTIC

random_generator.py

# ── New Generator API (NumPy 1.17+, recommended) ──
rng = np.random.default_rng(seed=42)

# ── Uniform ──
rng.random()                     # single float in [0, 1)
rng.random(5)                    # array of 5 floats
rng.random((3, 3))               # 3×3 uniform matrix
rng.uniform(low=-1, high=1, size=(3, 3))

# ── Integers ──
rng.integers(0, 10, size=5)     # [3, 7, 1, 9, 2] in [0, 10)
rng.integers(0, 10, size=(2, 3), endpoint=True)  # inclusive

# ── Normal (Gaussian) ──
rng.standard_normal(5)           # N(0, 1), 5 samples
rng.normal(loc=10, scale=2, size=(3, 3))  # N(10, 4)

# ── Other distributions ──
rng.exponential(scale=1.0, size=5)    # exponential
rng.poisson(lam=5.0, size=5)          # Poisson
rng.binomial(n=10, p=0.5, size=5)     # Binomial
rng.geometric(p=0.3, size=5)          # Geometric
rng.hypergeometric(15, 5, 8, size=5)  # Hypergeometric
rng.lognormal(mean=0, sigma=1, size=5) # Log-normal
rng.gamma(shape=2, scale=2, size=5)   # Gamma
rng.beta(a=2, b=5, size=5)            # Beta
rng.chisquare(df=3, size=5)           # Chi-squared
rng.f(dfnum=5, dfden=2, size=5)       # F-distribution
rng.t(df=10, size=5)                  # Student&apos;s t

# ── Shuffling & sampling ──
arr = np.arange(10)
rng.shuffle(arr)                 # shuffle in-place
rng.permutation(arr)             # shuffled copy (original unchanged)
rng.choice(arr, size=5, replace=False)  # sample without replacement
rng.choice(arr, size=5, replace=True, p=probs)  # weighted sample

random_advanced.py

# ── Multi-variate distributions ──
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]
rng.multivariate_normal(mean, cov, size=1000)

# ── Random seed management ──
rng = np.random.default_rng(42)        # reproducible
rng2 = np.random.default_rng(42)       # same stream
np.all(rng.random(5) == rng2.random(5))  # True

# ── Bit generators (low-level) ──
from numpy.random import PCG64, MT19937
bg = PCG64(42)
rng = np.random.Generator(bg)

# ── Save/restore state ──
state = rng.bit_generator.state
rng.bit_generator.state = state         # restore

# ── Create distributions as objects ──
dist = rng.standard_normal
samples = dist(size=1000)
# Or use scipy.stats for full distribution objects
from scipy import stats
norm = stats.norm(loc=0, scale=1)
norm.pdf(0)                    # 0.3989... (density)
norm.cdf(0)                    # 0.5 (cumulative)
norm.ppf(0.95)                 # 1.644... (percent point / quantile)
norm.rvs(size=1000)            # random samples
norm.fit(data)                 # MLE fit

Distributions Quick Reference

Distribution	Function	Key Params
Uniform	uniform(a, b)	low, high
Normal	normal(μ, σ)	loc, scale
Binomial	binomial(n, p)	n trials, p prob
Poisson	poisson(λ)	lam
Exponential	exponential(β)	scale
Gamma	gamma(α, β)	shape, scale
Beta	beta(α, β)	a, b
Chi-squared	chisquare(k)	df

Legacy vs New API

Legacy (avoid)	New (preferred)	Note
np.random.rand()	rng.random()	No shape tuple
np.random.randn()	rng.standard_normal()	No shape tuple
np.random.seed(42)	default_rng(42)	Global vs local
np.random.choice()	rng.choice()	Same interface
np.random.shuffle()	rng.shuffle()	Same interface

💡

Always use default_rng(seed) instead of np.random.seed(). The Generator API has better statistical properties (PCG64 vs MT19937), is faster, and avoids global state issues.

💾

File I/O

PERSISTENCE

text_binary_io.py

# ── Text files ──
np.savetxt('data.txt', arr, delimiter=',', fmt='%.4f')
np.savetxt('data.csv', arr, delimiter=',', header='col1,col2,col3',
           comments='', fmt=['%d', '%.2f', '%.4f'])

data = np.loadtxt('data.txt', delimiter=',')
data = np.loadtxt('data.csv', delimiter=',', skiprows=1)  # skip header
data = np.genfromtxt('data.csv', delimiter=',', names=True,  # named columns
                     dtype=None, missing_values='NA', filling_values=0)

# ── Binary NumPy format (.npy / .npz) ──
np.save('array.npy', arr)                   # single array
arr = np.load('array.npy')

np.savez('arrays.npz', a=arr1, b=arr2, c=arr3)  # multiple arrays
npz = np.load('arrays.npz')
npz['a'], npz['b'], npz['c']

np.savez_compressed('arrays.npz', a=arr1, b=arr2)  # compressed

# ── Memory-mapped files (for huge arrays) ──
mmap = np.memmap('large_array.dat', dtype='float32', mode='r+',
                 shape=(10000, 10000))
mmap[:] = rng.random((10000, 10000))   # writes go to disk
mmap[0, :5]                             # random access without loading all

# ── CSV with structured array ──
dt = np.dtype([('name', 'U20'), ('age', 'i4'), ('salary', 'f8')])
data = np.genfromtxt('employees.csv', delimiter=',', dtype=dt, names=True)
data['name']                            # access by field name
data[data['salary'] > 50000]            # filter

io_formats.py

# ── NumPy binary formats comparison ──
# .npy  — single array, header + raw data
# .npz  — collection of arrays (uncompressed zip)
# .npz  — compressed (savez_compressed)

# ── Structured arrays (like C structs) ──
dt = np.dtype([
    ('id', 'i4'),
    ('name', 'U20'),
    ('scores', 'f8', (3,)),     # sub-array of 3 floats
    ('active', '?'),
])
records = np.zeros(5, dtype=dt)
records[0] = (1, 'Alice', [95.5, 88.0, 92.3], True)
records['id']                    # array of all IDs
records['scores'][:, 0]          # first score for all

# ── Record arrays (attribute access) ──
rec = np.rec.array(records)
rec.id                          # attribute-style access
rec.name                        # same

# ── String formatting ──
np.set_printoptions(
    precision=4,               # float precision
    suppress_small=True,       # use scientific for small
    threshold=50,              # max elements to show
    linewidth=120,             # max line width
    formatter={'float': '{:.2e}'.format},
)
np.array2string(arr, precision=3, separator=', ')

File Format Comparison

Format	Read	Write	Pros
.npy	np.load()	np.save()	Fast, preserve dtype
.npz	np.load()	np.savez()	Multiple arrays
.txt/.csv	loadtxt()	savetxt()	Human-readable
memmap	np.memmap()	Direct assign	Huge arrays

savetxt Format Specifiers

Spec	Type	Example
%d	Integer	42
%f	Float (6 decimals)	3.141593
%.2f	Float (2 decimals)	3.14
%.4e	Scientific	3.1416e+00
%s	String	hello
%U20	Unicode string	numpy

💡

Use .npy / .npzfor intermediate data between Python sessions — they're much faster than CSV and preserve exact dtype information. Only use CSV for human-readable export or sharing with other tools.

🚀

Performance

OPTIMIZATION

vectorization.py

# ── Avoid Python loops — vectorize! ──
# SLOW: ~500ms for large array
def loop_sum(a):
    result = np.empty_like(a)
    for i in range(len(a)):
        result[i] = np.log(a[i]) + np.exp(a[i])
    return result

# FAST: ~2ms (250x faster)
def vectorized_sum(a):
    return np.log(a) + np.exp(a)

# ── Array operations are already vectorized ──
a = rng.random(1_000_000)

# GOOD
a * 2 + 1
np.sqrt(a**2 + 1)

# BAD (avoid)
result = np.array([x * 2 + 1 for x in a])

# ── Broadcasting vs loop ──
# SLOW
distances = np.array([
    np.sqrt(np.sum((points[i] - centers)**2))
    for i in range(len(points))
])

# FAST
distances = np.sqrt(np.sum((points[:, None] - centers[None, :])**2, axis=2))

# ── Conditional operations ──
# SLOW: loop with if/else
# FAST: np.where
result = np.where(a > 0, np.log(a), 0)

# ── Use axis parameter ──
b = rng.random((1000, 1000))
b.sum(axis=0)                   # column sums (vectorized)
b.mean(axis=1)                  # row means (vectorized)

advanced_perf.py

# ── Numba JIT compilation (for complex loops) ──
from numba import njit

@njit
def mandelbrot(c, max_iter=100):
    z = 0
    for n in range(max_iter):
        if abs(z) > 2:
            return n
        z = z*z + c
    return max_iter

# 100x+ speedup on numerical loops

# ── Memory layout matters ──
# C-order (row-major) — default in NumPy
# F-order (column-major) — sometimes faster for column ops
a_c = np.ascontiguousarray(a)   # force C-order
a_f = np.asfortranarray(a)      # force Fortran-order

# ── In-place operations ──
a *= 2          # in-place multiply (saves memory)
a += 1          # in-place add
a[:] = a * 2    # also in-place

# ── Preallocate output arrays ──
out = np.empty(1000)
np.add(a, b, out=out)           # avoid creating temporary arrays

# ── Strided tricks (zero-copy views) ──
from numpy.lib.stride_tricks import sliding_window_view
windows = sliding_window_view(a, window_shape=5)  # shape (N-4, 5)

# ── einsum — Einstein summation (flexible, often fast) ──
np.einsum('ij,jk->ik', A, B)    # matrix multiply
np.einsum('ij->ji', A)          # transpose
np.einsum('ii->', A)            # trace
np.einsum('ij,ij->ij', A, B)    # element-wise multiply
np.einsum('i,i->', a, b)        # dot product
np.einsum('ij,kj->ik', A, B)    # batched matrix ops
np.einsum('...ij,...jk->...ik', A, B)  # broadcast matmul

# ── Use BLAS/LAPACK ──
np.show_config()                # check if optimized BLAS is linked
# Look for: openblas, mkl, or blis

Performance Hierarchy

Speed	Method	When to Use
★★★★★	Vectorized numpy	Always try first
★★★★★	np.einsum()	Complex tensor contractions
★★★★☆	numba @njit	Loops you can't vectorize
★★★☆☆	sliding_window_view	Rolling computations
★★☆☆☆	List comprehension	Simple, small arrays
★☆☆☆☆	Python for-loop	Never for large arrays

einsum Patterns

Expression	Operation	Equivalent
'ij,jk->ik'	Matrix multiply	A @ B
'ij->ji'	Transpose	A.T
'ii->'	Trace	np.trace(A)
'i,i->'	Dot product	np.dot(a, b)
'ij->ji'	Transpose	A.T
'ij,kj->ik'	Multiply + sum	A @ B.T
'...ij,...jk->...ik'	Batch matmul	np.matmul(A, B)

🚫

Profile before optimizing! Use %timeit in Jupyter to benchmark. Always check that np.show_config() reports an optimized BLAS (OpenBLAS, MKL, or BLIS) — it gives 10x+ speedup on linear algebra vs. reference BLAS.

⏳

Loading cheatsheet...