⏳
Loading cheatsheet...
Array operations, indexing, broadcasting, vectorization, and performance tricks for numerical computing.
import numpy as np
# ── From Python data ──
a = np.array([1, 2, 3, 4, 5]) # 1D array
a = np.array([[1, 2, 3], [4, 5, 6]]) # 2D array
a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # 3D array
# ── Specify dtype ──
a = np.array([1, 2, 3], dtype=np.float32)
a = np.array([1, 2, 3], dtype='int8')
a = np.array([True, False, True], dtype=bool)
# ── Built-in creation functions ──
np.zeros((3, 4)) # 3×4 array of 0.0
np.zeros((3, 4), dtype=np.int32) # 3×4 array of 0
np.ones((2, 3)) # 2×3 array of 1.0
np.full((3, 3), fill_value=7) # 3×3 array of 7
np.empty((2, 3)) # uninitialized (garbage values)
np.empty_like(a) # same shape/type, uninitialized
# ── Ranges & sequences ──
np.arange(0, 10, 2) # [0, 2, 4, 6, 8]
np.arange(5) # [0, 1, 2, 3, 4]
np.linspace(0, 1, 5) # [0, 0.25, 0.5, 0.75, 1.0]
np.linspace(0, 2*np.pi, 100) # 100 points for sin/cos
np.geomspace(1, 1000, 4) # [1, 10, 100, 1000] (log scale)
np.logspace(0, 4, 5) # [1, 10, 100, 1000, 10000]
# ── Random (NumPy 2.0+ Generator API) ──
rng = np.random.default_rng(42)
rng.random((3, 3)) # uniform [0, 1)
rng.integers(0, 100, size=(3, 3))
rng.standard_normal((3, 3)) # standard normal
rng.normal(0, 1, size=(3, 3)) # N(μ=0, σ²=1)
# ── Identity & diagonal ──
np.eye(4) # 4×4 identity
np.eye(3, k=1) # identity shifted by 1
np.diag([1, 2, 3, 4]) # diagonal matrix from list
np.diag(np.arange(5)) # diagonal from array
np.tri(4) # lower triangular matrix
# ── meshgrid (for 3D plots, grid evaluation) ──
x = np.linspace(-5, 5, 100)
y = np.linspace(-5, 5, 100)
X, Y = np.meshgrid(x, y)
Z = np.sin(X) * np.cos(Y)a = np.array([[1, 2, 3], [4, 5, 6]])
# ── Core properties ──
a.ndim # number of dimensions (axes)
a.shape # (rows, cols) tuple
a.size # total elements
a.dtype # data type
a.itemsize # bytes per element
a.nbytes # total bytes (size × itemsize)
a.T # transpose
# ── Reshaping ──
a.reshape(3, 2) # change shape (must match size)
a.reshape(-1) # flatten to 1D
a.reshape(-1, 1) # column vector
a.reshape(1, -1) # row vector
a.ravel() # flatten (returns view if possible)
a.flatten() # flatten (always returns copy)
a.resize((3, 2)) # resize in-place (may discard data)
np.squeeze(a) # remove size-1 dimensions
# ── Adding dimensions ──
np.newaxis # alias for None
a[np.newaxis, :] # add axis at position 0 (1, M, N)
a[:, np.newaxis] # add axis at position 1 (M, 1, N)
np.expand_dims(a, axis=0) # same as newaxis
np.expand_dims(a, axis=(0, 2)) # add at multiple positions
# ── Tiling & repeating ──
np.tile(a, (2, 3)) # repeat entire array 2×3
np.repeat(a, 3, axis=0) # repeat each row 3 times
np.repeat(a, 3, axis=1) # repeat each column 3 times
# ── Common patterns ──
grid = np.indices((3, 3)) # [[row indices], [col indices]]
I, J = np.indices((3, 3))
# I = [[0,0,0],[1,1,1],[2,2,2]]
# J = [[0,1,2],[0,1,2],[0,1,2]]| Type | Code | Range / Description |
|---|---|---|
| int8 | i1 | -128 to 127 |
| int16 | i2 | -32,768 to 32,767 |
| int32 | i4 | ±2.1 billion |
| int64 | i8 | ±9.2 × 10¹⁸ |
| float16 | f2 | Half precision |
| float32 | f4 | Single precision (~7 digits) |
| float64 | f8 | Double precision (~15 digits) |
| complex64 | c8 | Two float32 |
| complex128 | c16 | Two float64 |
| bool_ | ? | True / False |
| Function | Shape | Filled With |
|---|---|---|
| zeros((m,n)) | m×n | 0.0 |
| ones((m,n)) | m×n | 1.0 |
| full((m,n), v) | m×n | v |
| empty((m,n)) | m×n | Uninitialized |
| eye(n) | n×n | Identity matrix |
| arange(a,b,s) | 1D | a, a+s, a+2s, ... |
| linspace(a,b,n) | 1D | n evenly spaced |
| diag(v) | n×n | Diagonal from v |
rng = np.random.default_rng(seed) (NumPy 1.17+) instead of the legacy np.random module. The new Generator API is faster, more flexible, and has better statistical properties.a = np.array([10, 20, 30, 40, 50, 60, 70, 80])
# ── 1D indexing ──
a[0] # 10 (first element)
a[-1] # 80 (last element)
a[2:5] # [30, 40, 50] (indices 2,3,4)
a[::2] # [10, 30, 50, 70] (every other)
a[::-1] # [80, 70, ..., 10] (reversed)
a[1::2] # [20, 40, 60, 80] (even indices)
# ── 2D indexing ──
b = np.arange(12).reshape(3, 4)
# [[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
b[0, 1] # 1 (row 0, col 1)
b[1] # [4, 5, 6, 7] (entire row)
b[:, 2] # [2, 6, 10] (entire column)
b[0:2, 1:3] # [[1,2], [5,6]] (sub-matrix)
b[:, ::2] # every other column
b[::-1, :] # reversed rows
b[-1] # last row
# ── Assigning values ──
a[2:5] = [99, 98, 97]
b[:, 0] = -1
b[b > 5] = 0 # conditional assignment# ── Fancy (integer array) indexing ──
a = np.arange(10)
a[[3, 1, 7, 5]] # [3, 1, 7, 5]
a[np.array([0, 0, 3, 8, 8])] # [0, 0, 3, 8, 8] (duplicates OK)
# 2D fancy indexing
b = np.arange(12).reshape(3, 4)
rows = [0, 2, 1]
cols = [1, 3, 0]
b[rows, cols] # [1, 11, 4] — pairs (0,1), (2,3), (1,0)
# ── Boolean indexing ──
a = np.array([5, 3, 8, 1, 9, 2, 7])
mask = a > 4 # [True, False, True, ...]
a[mask] # [5, 8, 9, 7]
a[a > 4] # one-liner
a[(a > 3) & (a < 8)] # [5, 7] (use & | ~ for boolean logic)
a[(a < 3) | (a > 8)] # [1, 2, 9]
# ── np.where ──
np.where(a > 4, a, 0) # replace values ≤4 with 0
np.where(a > 4) # returns (indices,) where condition True
np.where(a > 4, 'yes', 'no') # string result
# ── np.take / np.put ──
np.take(a, [0, 2, 4]) # same as a[[0,2,4]]
np.put(a, [1, 3, 5], [99, 88, 77]) # in-place replacement
# ── np.select ──
conditions = [a < 3, a < 7, a >= 7]
choices = ['low', 'mid', 'high']
np.select(conditions, choices, default='unknown')
# ── np.argmax / np.argmin / np.argsort ──
a.argmax() # index of max value
a.argmin() # index of min value
a.argsort() # indices that sort the array
a[a.argsort()] # sorted array
# ── np.nonzero ──
b = np.array([[0, 1, 0], [1, 0, 1]])
np.nonzero(b) # (array([0,1,1]), array([1,0,2]))
b[np.nonzero(b)] # [1, 1, 1]| Type | Syntax | Returns |
|---|---|---|
| Basic | a[5] | Scalar or slice |
| Slice | a[2:8:2] | View (no copy) |
| Fancy | a[[1,3,5]] | Copy (new array) |
| Boolean | a[mask] | Copy (filtered) |
| np.where | np.where(cond) | Indices or values |
| Ellipsis | a[..., 0] | Selects remaining dims |
.copy() to get an independent array if needed.# ── Broadcasting: NumPy stretches smaller arrays to match larger ones
# WITHOUT actually copying the data (virtual replication)
# ── Scalar + array ──
a = np.array([[1, 2, 3],
[4, 5, 6]]) # shape (2, 3)
a + 10 # scalar → (1,1) → (2,3): add 10 to all
# [[11, 12, 13], [14, 15, 16]]
# ── 1D + 2D ──
row = np.array([10, 20, 30]) # shape (3,)
a + row # row → (1,3) → (2,3): add to each row
# [[11, 22, 33], [14, 25, 36]]
# ── Column vector + 2D ──
col = np.array([[10], [20]]) # shape (2, 1)
a + col # col → (2,1) → (2,3): add to each column
# [[11, 12, 13], [24, 25, 26]]
# ── 1D + 1D (outer operation) ──
x = np.array([1, 2, 3]) # (3,)
y = np.array([10, 20, 30]) # (3,)
x[:, np.newaxis] + y # (3,1) + (3,) → (3,3)
# [[11, 21, 31], [12, 22, 32], [13, 23, 33]]
# ── Incompatible shapes (error) ──
# a.shape = (2, 3)
# b.shape = (2,)
# a + b → ValueError! Can't broadcast (2,3) with (2,)# ── Broadcasting rules (summary) ──
# 1. Compare shapes from the trailing dimension forward
# 2. Dimensions are compatible if:
# a) They are equal, OR
# b) One of them is 1
# 3. Array with dim=1 is virtually stretched
# Shape compatibility examples:
# (3, 4) + (4,) → OK → (3, 4)
# (3, 1) + (1, 4) → OK → (3, 4)
# (3, 4, 1) + (4,) → OK → (3, 4, 4)
# (2, 3, 4) + (1, 4) → OK → (2, 3, 4)
# (2, 3) + (4,) → FAIL (3 ≠ 4 and neither is 1)
# ── Practical examples ──
# Center columns (subtract mean per column)
X = np.random.randn(100, 5)
X_centered = X - X.mean(axis=0) # (100,5) - (5,) → broadcast
# Normalize rows
row_norms = np.linalg.norm(X, axis=1, keepdims=True) # (100,1)
X_normalized = X / row_norms # (100,5) / (100,1) → broadcast
# Outer product
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
outer = a[:, np.newaxis] * b # (3,1) * (3,) → (3,3)
# Distance matrix
points = np.random.randn(10, 2)
diff = points[:, np.newaxis, :] - points[np.newaxis, :, :]
dist = np.linalg.norm(diff, axis=2) # (10, 10) pairwise distances
# ── keepdims=True preserves dimensions for broadcasting ──
X = np.arange(12).reshape(3, 4)
X.sum(axis=1) # shape (3,) — NOT broadcastable back
X.sum(axis=1, keepdims=True) # shape (3,1) — broadcastable to (3,4)| Rule | Description |
|---|---|
| 1. Align | Compare shapes right-to-left |
| 2. Equal or 1 | Dims must match or one must be 1 |
| 3. Stretch | Dim=1 is virtually replicated |
| 4. Missing | Missing left dims are treated as 1 |
| 5. Error | If dims conflict → ValueError |
| Pattern | Code | Use Case |
|---|---|---|
| Center columns | X - X.mean(0) | Standardize features |
| Normalize rows | X / norm(X, axis=1, keepdims=True) | Row-wise unit vectors |
| Outer product | a[:, None] * b | Cross-combination |
| Add bias | X + bias[np.newaxis, :] | Add bias vector |
| Distance matrix | ||x_i - x_j|| | Pairwise distances |
| Mask broadcast | X * mask[:, None] | Apply row masks |
keepdims=True with .sum(), .mean(), etc. to preserve shape for broadcasting. Without it, the reduced axis disappears and you'll get shape mismatches.a = np.array([1, 2, 3, 4, 5])
# ── Arithmetic (element-wise) ──
np.add(a, 10) # or: a + 10
np.subtract(a, 3) # or: a - 3
np.multiply(a, 2) # or: a * 2
np.divide(a, 2) # or: a / 2
np.floor_divide(a, 2) # or: a // 2
np.power(a, 3) # or: a ** 3
np.mod(a, 3) # or: a % 3
np.negative(a) # or: -a
np.abs(a)
np.fmod(a, 3) # C-style mod (sign follows divisor)
# ── Rounding ──
np.round(3.14159, 2) # 3.14
np.floor(3.7) # 3.0
np.ceil(3.2) # 4.0
np.trunc(-3.7) # -3.0
np.rint(3.5) # 4.0 (round to nearest even)
# ── Aggregate reductions ──
a.sum() # sum of all
a.prod() # product
a.cumsum() # cumulative sum
a.cumprod() # cumulative product
a.min(), a.max()
a.argmin(), a.argmax() # indices of min/max
a.mean()
a.std() # standard deviation
a.var() # variance
np.percentile(a, [25, 50, 75]) # quartiles
np.nanmean(a) # mean ignoring NaN
np.nansum(a) # sum ignoring NaN
# ── With axis parameter ──
b = np.arange(12).reshape(3, 4)
b.sum(axis=0) # column sums (shape: 4,)
b.sum(axis=1) # row sums (shape: 3,)
b.sum(axis=0, keepdims=True) # column sums (shape: 1,4)# ── Trigonometric ──
x = np.linspace(0, 2*np.pi, 100)
np.sin(x)
np.cos(x)
np.tan(x)
np.arcsin(a) # inverse sine
np.arccos(a) # inverse cosine
np.arctan(a) # inverse tangent
np.arctan2(y, x) # angle of (x,y) in radians
np.hypot(3, 4) # 5.0 (sqrt(9+16))
np.degrees(np.pi) # 180.0
np.radians(180) # π
# ── Exponential & logarithmic ──
np.exp(a) # e^a
np.exp2(a) # 2^a
np.expm1(a) # e^a - 1 (accurate for small a)
np.log(a) # natural log
np.log2(a) # base-2 log
np.log10(a) # base-10 log
np.log1p(a) # log(1+a) (accurate for small a)
np.logaddexp(x, y) # log(exp(x) + exp(y))
# ── Hyperbolic ──
np.sinh(x)
np.cosh(x)
np.tanh(x) # often used in ML activations
# ── Misc ──
np.sqrt(a) # square root
np.square(a) # a²
np.cbrt(a) # cube root
np.sign(a) # -1, 0, or 1
np.clip(a, 0, 10) # clip values to [0, 10]
np.maximum(a, 0) # element-wise max (like ReLU)
np.minimum(a, 0) # element-wise min
np.fmax(a, b) # max ignoring NaN
np.fmin(a, b) # min ignoring NaN
np.isfinite(a) # not inf and not NaN
np.isinf(a)
np.isnan(a)
np.isclose(a, b, rtol=1e-5) # element-wise close comparison
np.allclose(a, b) # all elements close?| Method | Description | Example |
|---|---|---|
| .reduce() | Apply to all elements | np.add.reduce(a) = sum(a) |
| .accumulate() | Cumulative results | np.add.accumulate(a) = cumsum |
| .outer() | Outer operation | np.add.outer([1,2],[3,4]) |
| .at() | Unbuffered in-place | np.add.at(a, idx, vals) |
| .out= | Output array | np.add(a, b, out=c) |
| Function | Description |
|---|---|
| np.special.erf(x) | Error function |
| np.special.gamma(x) | Gamma function |
| np.special.beta(a, b) | Beta function |
| np.special.comb(n, k) | Binomial coefficient |
| np.special.perm(n, k) | Permutations |
| np.i0(x) | Modified Bessel function |
np.sqrt(arr) is 100x+ faster than [math.sqrt(x) for x in arr]because it's implemented in C with SIMD vectorization.import numpy.linalg as la
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
# ── Element-wise operations (NOT matrix ops) ──
A * B # element-wise multiply
A + B # element-wise add
A ** 2 # element-wise square
# ── Matrix multiplication ──
A @ B # matrix multiply (2×2 × 2×2 = 2×2)
np.matmul(A, B) # same as @
np.dot(A, B) # works for 1D and 2D
A.dot(B) # method form
# ── Vector dot product ──
v = np.array([1, 2, 3])
w = np.array([4, 5, 6])
np.dot(v, w) # 32
v @ w # 32
# ── Determinant ──
la.det(A) # -2.0
# ── Inverse ──
la.inv(A) # [[-2, 1], [1.5, -0.5]]
A_inv = la.pinv(A) # pseudo-inverse (works for non-square)
# ── Solve linear system Ax = b ──
b = np.array([5, 6])
x = la.solve(A, b) # x = A⁻¹b
# Verify: A @ x ≈ b
# ── Rank ──
la.matrix_rank(A) # 2
# ── Trace ──
np.trace(A) # 5 (sum of diagonal)# ── Eigenvalues & eigenvectors ──
eigenvalues, eigenvectors = la.eig(A)
eigenvalues # array of eigenvalues λ
eigenvectors[:, i] # eigenvector for eigenvalue i
# For symmetric matrices, use eigh (faster, stable)
eigenvalues, eigenvectors = la.eigh(A)
# ── SVD (Singular Value Decomposition) ──
# A = U @ diag(S) @ Vt
U, S, Vt = la.svd(A, full_matrices=True)
U, S, Vt = la.svd(A, full_matrices=False) # compact SVD
# Truncated SVD (dimensionality reduction)
U, S, Vt = la.svd(A, full_matrices=False)
k = 2
A_reconstructed = U[:, :k] @ np.diag(S[:k]) @ Vt[:k, :]
# ── LU Decomposition ──
from scipy.linalg import lu
P, L, U = lu(A)
# ── QR Decomposition ──
Q, R = la.qr(A) # A = QR, Q orthogonal, R upper triangular
Q, R = la.qr(A, mode='reduced') # economy size
# ── Cholesky Decomposition (symmetric positive definite) ──
L = la.cholesky(A)
# A = L @ L.T
# ── Matrix norms ──
la.norm(A) # Frobenius norm
la.norm(A, ord=2) # spectral norm (largest singular value)
la.norm(A, ord=np.inf) # max row sum
la.norm(A, ord=1) # max column sum
la.norm(A, ord='nuc') # nuclear norm (sum of singular values)
la.cond(A) # condition number
# ── Matrix properties ──
la.matrix_power(A, 3) # A³ = A @ A @ A
la.matrix_rank(A) # rank
np.allclose(A @ la.inv(A), np.eye(2)) # verify inverse| Function | Description | Returns |
|---|---|---|
| la.det(A) | Determinant | Scalar |
| la.inv(A) | Matrix inverse | Matrix |
| la.pinv(A) | Pseudo-inverse | Matrix |
| la.solve(A, b) | Solve Ax=b | Vector |
| la.eig(A) | Eigen decomposition | values, vectors |
| la.eigh(A) | Eigen (symmetric) | values, vectors |
| la.svd(A) | Singular value decomp | U, S, Vt |
| la.qr(A) | QR decomposition | Q, R |
| la.cholesky(A) | Cholesky decomp | L |
| Operator | Name | Behavior |
|---|---|---|
| A @ B | matmul | Matrix multiplication |
| A * B | multiply | Element-wise multiply |
| A ** 2 | power | Element-wise square |
| A.T | transpose | Matrix transpose |
| A.conj().T | adjoint | Conjugate transpose |
la.eigh() instead of la.eig()for symmetric/Hermitian matrices — it's faster, more numerically stable, and returns real eigenvalues guaranteed.# ── New Generator API (NumPy 1.17+, recommended) ──
rng = np.random.default_rng(seed=42)
# ── Uniform ──
rng.random() # single float in [0, 1)
rng.random(5) # array of 5 floats
rng.random((3, 3)) # 3×3 uniform matrix
rng.uniform(low=-1, high=1, size=(3, 3))
# ── Integers ──
rng.integers(0, 10, size=5) # [3, 7, 1, 9, 2] in [0, 10)
rng.integers(0, 10, size=(2, 3), endpoint=True) # inclusive
# ── Normal (Gaussian) ──
rng.standard_normal(5) # N(0, 1), 5 samples
rng.normal(loc=10, scale=2, size=(3, 3)) # N(10, 4)
# ── Other distributions ──
rng.exponential(scale=1.0, size=5) # exponential
rng.poisson(lam=5.0, size=5) # Poisson
rng.binomial(n=10, p=0.5, size=5) # Binomial
rng.geometric(p=0.3, size=5) # Geometric
rng.hypergeometric(15, 5, 8, size=5) # Hypergeometric
rng.lognormal(mean=0, sigma=1, size=5) # Log-normal
rng.gamma(shape=2, scale=2, size=5) # Gamma
rng.beta(a=2, b=5, size=5) # Beta
rng.chisquare(df=3, size=5) # Chi-squared
rng.f(dfnum=5, dfden=2, size=5) # F-distribution
rng.t(df=10, size=5) # Student's t
# ── Shuffling & sampling ──
arr = np.arange(10)
rng.shuffle(arr) # shuffle in-place
rng.permutation(arr) # shuffled copy (original unchanged)
rng.choice(arr, size=5, replace=False) # sample without replacement
rng.choice(arr, size=5, replace=True, p=probs) # weighted sample# ── Multi-variate distributions ──
mean = [0, 0]
cov = [[1, 0.5], [0.5, 1]]
rng.multivariate_normal(mean, cov, size=1000)
# ── Random seed management ──
rng = np.random.default_rng(42) # reproducible
rng2 = np.random.default_rng(42) # same stream
np.all(rng.random(5) == rng2.random(5)) # True
# ── Bit generators (low-level) ──
from numpy.random import PCG64, MT19937
bg = PCG64(42)
rng = np.random.Generator(bg)
# ── Save/restore state ──
state = rng.bit_generator.state
rng.bit_generator.state = state # restore
# ── Create distributions as objects ──
dist = rng.standard_normal
samples = dist(size=1000)
# Or use scipy.stats for full distribution objects
from scipy import stats
norm = stats.norm(loc=0, scale=1)
norm.pdf(0) # 0.3989... (density)
norm.cdf(0) # 0.5 (cumulative)
norm.ppf(0.95) # 1.644... (percent point / quantile)
norm.rvs(size=1000) # random samples
norm.fit(data) # MLE fit| Distribution | Function | Key Params |
|---|---|---|
| Uniform | uniform(a, b) | low, high |
| Normal | normal(μ, σ) | loc, scale |
| Binomial | binomial(n, p) | n trials, p prob |
| Poisson | poisson(λ) | lam |
| Exponential | exponential(β) | scale |
| Gamma | gamma(α, β) | shape, scale |
| Beta | beta(α, β) | a, b |
| Chi-squared | chisquare(k) | df |
| Legacy (avoid) | New (preferred) | Note |
|---|---|---|
| np.random.rand() | rng.random() | No shape tuple |
| np.random.randn() | rng.standard_normal() | No shape tuple |
| np.random.seed(42) | default_rng(42) | Global vs local |
| np.random.choice() | rng.choice() | Same interface |
| np.random.shuffle() | rng.shuffle() | Same interface |
default_rng(seed) instead of np.random.seed(). The Generator API has better statistical properties (PCG64 vs MT19937), is faster, and avoids global state issues.# ── Text files ──
np.savetxt('data.txt', arr, delimiter=',', fmt='%.4f')
np.savetxt('data.csv', arr, delimiter=',', header='col1,col2,col3',
comments='', fmt=['%d', '%.2f', '%.4f'])
data = np.loadtxt('data.txt', delimiter=',')
data = np.loadtxt('data.csv', delimiter=',', skiprows=1) # skip header
data = np.genfromtxt('data.csv', delimiter=',', names=True, # named columns
dtype=None, missing_values='NA', filling_values=0)
# ── Binary NumPy format (.npy / .npz) ──
np.save('array.npy', arr) # single array
arr = np.load('array.npy')
np.savez('arrays.npz', a=arr1, b=arr2, c=arr3) # multiple arrays
npz = np.load('arrays.npz')
npz['a'], npz['b'], npz['c']
np.savez_compressed('arrays.npz', a=arr1, b=arr2) # compressed
# ── Memory-mapped files (for huge arrays) ──
mmap = np.memmap('large_array.dat', dtype='float32', mode='r+',
shape=(10000, 10000))
mmap[:] = rng.random((10000, 10000)) # writes go to disk
mmap[0, :5] # random access without loading all
# ── CSV with structured array ──
dt = np.dtype([('name', 'U20'), ('age', 'i4'), ('salary', 'f8')])
data = np.genfromtxt('employees.csv', delimiter=',', dtype=dt, names=True)
data['name'] # access by field name
data[data['salary'] > 50000] # filter# ── NumPy binary formats comparison ──
# .npy — single array, header + raw data
# .npz — collection of arrays (uncompressed zip)
# .npz — compressed (savez_compressed)
# ── Structured arrays (like C structs) ──
dt = np.dtype([
('id', 'i4'),
('name', 'U20'),
('scores', 'f8', (3,)), # sub-array of 3 floats
('active', '?'),
])
records = np.zeros(5, dtype=dt)
records[0] = (1, 'Alice', [95.5, 88.0, 92.3], True)
records['id'] # array of all IDs
records['scores'][:, 0] # first score for all
# ── Record arrays (attribute access) ──
rec = np.rec.array(records)
rec.id # attribute-style access
rec.name # same
# ── String formatting ──
np.set_printoptions(
precision=4, # float precision
suppress_small=True, # use scientific for small
threshold=50, # max elements to show
linewidth=120, # max line width
formatter={'float': '{:.2e}'.format},
)
np.array2string(arr, precision=3, separator=', ')| Format | Read | Write | Pros |
|---|---|---|---|
| .npy | np.load() | np.save() | Fast, preserve dtype |
| .npz | np.load() | np.savez() | Multiple arrays |
| .txt/.csv | loadtxt() | savetxt() | Human-readable |
| memmap | np.memmap() | Direct assign | Huge arrays |
| Spec | Type | Example |
|---|---|---|
| %d | Integer | 42 |
| %f | Float (6 decimals) | 3.141593 |
| %.2f | Float (2 decimals) | 3.14 |
| %.4e | Scientific | 3.1416e+00 |
| %s | String | hello |
| %U20 | Unicode string | numpy |
.npy / .npzfor intermediate data between Python sessions — they're much faster than CSV and preserve exact dtype information. Only use CSV for human-readable export or sharing with other tools.# ── Avoid Python loops — vectorize! ──
# SLOW: ~500ms for large array
def loop_sum(a):
result = np.empty_like(a)
for i in range(len(a)):
result[i] = np.log(a[i]) + np.exp(a[i])
return result
# FAST: ~2ms (250x faster)
def vectorized_sum(a):
return np.log(a) + np.exp(a)
# ── Array operations are already vectorized ──
a = rng.random(1_000_000)
# GOOD
a * 2 + 1
np.sqrt(a**2 + 1)
# BAD (avoid)
result = np.array([x * 2 + 1 for x in a])
# ── Broadcasting vs loop ──
# SLOW
distances = np.array([
np.sqrt(np.sum((points[i] - centers)**2))
for i in range(len(points))
])
# FAST
distances = np.sqrt(np.sum((points[:, None] - centers[None, :])**2, axis=2))
# ── Conditional operations ──
# SLOW: loop with if/else
# FAST: np.where
result = np.where(a > 0, np.log(a), 0)
# ── Use axis parameter ──
b = rng.random((1000, 1000))
b.sum(axis=0) # column sums (vectorized)
b.mean(axis=1) # row means (vectorized)# ── Numba JIT compilation (for complex loops) ──
from numba import njit
@njit
def mandelbrot(c, max_iter=100):
z = 0
for n in range(max_iter):
if abs(z) > 2:
return n
z = z*z + c
return max_iter
# 100x+ speedup on numerical loops
# ── Memory layout matters ──
# C-order (row-major) — default in NumPy
# F-order (column-major) — sometimes faster for column ops
a_c = np.ascontiguousarray(a) # force C-order
a_f = np.asfortranarray(a) # force Fortran-order
# ── In-place operations ──
a *= 2 # in-place multiply (saves memory)
a += 1 # in-place add
a[:] = a * 2 # also in-place
# ── Preallocate output arrays ──
out = np.empty(1000)
np.add(a, b, out=out) # avoid creating temporary arrays
# ── Strided tricks (zero-copy views) ──
from numpy.lib.stride_tricks import sliding_window_view
windows = sliding_window_view(a, window_shape=5) # shape (N-4, 5)
# ── einsum — Einstein summation (flexible, often fast) ──
np.einsum('ij,jk->ik', A, B) # matrix multiply
np.einsum('ij->ji', A) # transpose
np.einsum('ii->', A) # trace
np.einsum('ij,ij->ij', A, B) # element-wise multiply
np.einsum('i,i->', a, b) # dot product
np.einsum('ij,kj->ik', A, B) # batched matrix ops
np.einsum('...ij,...jk->...ik', A, B) # broadcast matmul
# ── Use BLAS/LAPACK ──
np.show_config() # check if optimized BLAS is linked
# Look for: openblas, mkl, or blis| Speed | Method | When to Use |
|---|---|---|
| ★★★★★ | Vectorized numpy | Always try first |
| ★★★★★ | np.einsum() | Complex tensor contractions |
| ★★★★☆ | numba @njit | Loops you can't vectorize |
| ★★★☆☆ | sliding_window_view | Rolling computations |
| ★★☆☆☆ | List comprehension | Simple, small arrays |
| ★☆☆☆☆ | Python for-loop | Never for large arrays |
| Expression | Operation | Equivalent |
|---|---|---|
| 'ij,jk->ik' | Matrix multiply | A @ B |
| 'ij->ji' | Transpose | A.T |
| 'ii->' | Trace | np.trace(A) |
| 'i,i->' | Dot product | np.dot(a, b) |
| 'ij->ji' | Transpose | A.T |
| 'ij,kj->ik' | Multiply + sum | A @ B.T |
| '...ij,...jk->...ik' | Batch matmul | np.matmul(A, B) |
%timeit in Jupyter to benchmark. Always check that np.show_config() reports an optimized BLAS (OpenBLAS, MKL, or BLIS) — it gives 10x+ speedup on linear algebra vs. reference BLAS.