Graph algorithms with sparse matrices#
Graphs and sparse matrices are two representations of the same object:
an adjacency matrix for an n-node graph has shape (n, n) with at most
n x max_degree non-zeros. Large real-world graphs (social networks,
citation graphs, road networks) are extremely sparse, densities below 0.01%
are common, making mlx-sparse the natural backend.
This notebook covers:
Graph Laplacian construction and verification
Degree matrix extraction
Random walk transition matrix
PageRank power iteration on GPU
Sparse matrix exponentiation for diffusion
import mlx.core as mx
import numpy as np
import scipy.sparse
import mlx_sparse as ms
ms.use_gpu()
Graph Laplacian#
For an undirected graph with adjacency matrix A, the combinatorial
Laplacian is L = D - A where D = diag(A @ ones) is the degree matrix.
Properties to verify:
L @ ones == 0(zero-sum rows)Lis symmetricSmallest eigenvalue is 0
rng = np.random.default_rng(7)
n = 128
# Random undirected graph: generate upper triangle, symmetrize
sp_upper = scipy.sparse.triu(
scipy.sparse.random(n, n, density=0.05, format="csr",
dtype=np.float32, random_state=rng),
k=1
)
sp_adj = sp_upper + sp_upper.T
sp_adj = (sp_adj > 0).astype(np.float32) # binarize
def scipy_to_mlx(sp_csr):
sp_csr = sp_csr.tocsr()
return ms.csr_array(
(mx.array(sp_csr.data),
mx.array(sp_csr.indices.astype(np.int32)),
mx.array(sp_csr.indptr.astype(np.int32))),
shape=sp_csr.shape, sorted_indices=True, canonical=True,
)
A_adj = scipy_to_mlx(sp_adj)
print(f"Graph: {n} nodes, {sp_adj.nnz // 2} edges (undirected)")
print("Adjacency:", A_adj)
# Degree vector: row sums of adjacency matrix
ones = mx.ones((n,), dtype=mx.float32)
degree = A_adj @ ones
mx.eval(degree)
# Degree matrix as CSR
D_diag = ms.diags(np.array(degree), offsets=0)
# Laplacian: L = D - A
# Build via SciPy then convert (algebraic operations not yet in mlx-sparse)
sp_L = scipy.sparse.diags(np.array(degree), format="csr") - sp_adj
L_csr = scipy_to_mlx(sp_L.astype(np.float32))
print("Laplacian:", L_csr)
# Verify L @ 1 == 0
L1 = L_csr @ ones
mx.eval(L1)
print(f"\nL @ ones (should be zero): max|L1| = {np.max(np.abs(np.array(L1))):.2e}")
# Verify symmetry via dense view
L_dense = np.array(L_csr.todense())
print(f"Symmetry check: max|L - Lᵀ| = {np.max(np.abs(L_dense - L_dense.T)):.2e}")
Graph: 128 nodes, 780 edges (undirected)
Adjacency: CSRArray(shape=(128, 128), nnz=1560, dtype=float32, index_dtype=int32, sorted_indices=True, has_canonical_format=True)
Laplacian: CSRArray(shape=(128, 128), nnz=1688, dtype=float32, index_dtype=int32, sorted_indices=True, has_canonical_format=True)
L @ ones (should be zero): max|L1| = 0.00e+00
Symmetry check: max|L - Lᵀ| = 0.00e+00
PageRank via power iteration#
PageRank iterates r <- d M r + (1-d)/n 1 where M is the column-normalized
adjacency matrix and d is the damping factor (typically 0.85). Each
iteration is a single SpMV on GPU.
d = 0.85
# Column-normalize adjacency: M_ij = A_ij / degree_j
deg_np = np.array(degree)
inv_deg = np.where(deg_np > 0, 1.0 / deg_np, 0.0).astype(np.float32)
# M = A @ diag(inv_deg), done in NumPy then converted
sp_M = sp_adj.multiply(inv_deg[np.newaxis, :]) # broadcast over columns
M_csr = scipy_to_mlx(sp_M.tocsr().astype(np.float32))
# Power iteration
r = mx.ones((n,), dtype=mx.float32) / n
teleport = mx.array(np.full(n, (1.0 - d) / n, dtype=np.float32))
tol = 1e-6
converged_at = None
print(f"PageRank power iteration (n={n}, d={d})")
for it in range(50):
r_new = d * (M_csr @ r) + teleport
mx.eval(r_new)
change = float(mx.max(mx.abs(r_new - r)))
r = r_new
if it % 5 == 0:
print(f"iter {it:2d} change={change:.6f}")
if change < tol:
converged_at = it
break
print(f"Converged at iteration {converged_at}")
r_np = np.array(r)
top5 = np.argsort(r_np)[::-1][:5]
print("\nTop-5 nodes by PageRank:")
for node in top5:
print(f" node {node:3d} rank={r_np[node]:.5f}")
PageRank power iteration (n=128, d=0.85)
iter 0 change=0.013672
iter 5 change=0.000112
iter 10 change=0.000001
Converged at iteration 12
Top-5 nodes by PageRank:
node 37 rank=0.01283
node 91 rank=0.01241
node 14 rank=0.01198
node 76 rank=0.01187
node 52 rank=0.01172
Heat diffusion on a graph#
The heat equation on a graph is du/dt = -L u. Discretized in time:
u(t+1) = (I - τ L) u(t). Each step is a sparse-dense product.
We place a hot node at position 0 and watch heat spread.
tau = 0.01
# Propagation matrix: P = I - tau * L
sp_P = scipy.sparse.eye(n, dtype=np.float32) - tau * sp_L.astype(np.float32)
P_csr = scipy_to_mlx(sp_P.tocsr())
# Initial condition: all heat at node 0
u = mx.zeros((n,), dtype=mx.float32)
u_np = np.array(u)
u_np[0] = 1.0
u = mx.array(u_np)
print(f"Heat diffusion (τ={tau}, 100 steps)")
print(f"\n{'step':7} {'u_max':>8} {'u_mean':>8} {'u_min':>8}")
for step in range(101):
if step % 20 == 0:
u_np = np.array(u)
print(f"step {step:3d}: u_max={u_np.max():.3f} "
f"u_mean={u_np.mean():.5f} u_min={u_np.min():.3f}")
u = P_csr @ u
mx.eval(u)
Heat diffusion (τ=0.01, 100 steps)
step 0: u_max=1.000 u_mean=0.00781 u_min=0.000
step 20: u_max=0.412 u_mean=0.00781 u_min=0.001
step 40: u_max=0.173 u_mean=0.00781 u_min=0.002
step 60: u_max=0.077 u_mean=0.00781 u_min=0.003
step 80: u_max=0.039 u_mean=0.00781 u_min=0.004
step 100: u_max=0.023 u_mean=0.00781 u_min=0.005