Skip to content

Automatic Differentiation

gradient(fn, argnum=0)

Compute the gradient of a scalar-valued function w.r.t. the input at index argnum.

Parameters:

Name Type Description Default
fn NumericalFunction[I, Tensor]

A NumericalFunction with one or more inputs and a scalar output.

required
argnum int

Index of the input to differentiate w.r.t. (default 0).

0

Returns:

Type Description
NumericalFunction[I, Tensor]

A NumericalFunction computing the gradient (same shape as input[argnum]).

Source code in src/anvil/ad/gradient.py
def gradient[I: tuple[Arg, ...]](fn: NumericalFunction[I, Tensor], argnum: int = 0) -> NumericalFunction[I, Tensor]:
  """Compute the gradient of a scalar-valued function w.r.t. the input at index `argnum`.

  Args:
    fn: A NumericalFunction with one or more inputs and a scalar output.
    argnum: Index of the input to differentiate w.r.t. (default 0).

  Returns:
    A NumericalFunction computing the gradient (same shape as input[argnum]).
  """
  assert len(fn.outputs) == 1
  assert 0 <= argnum < len(fn.inputs)
  input_arg = fn.inputs[argnum]
  assert len(make_more(fn.outputs[0].shape)) == 0, "gradient requires scalar output"
  inshape = make_more(input_arg.shape)
  assert len(inshape) == 1, "gradient currently only supports 1D inputs"

  def wrapper(*args) -> Tensor:
    output = fn.fn(*args)
    x = cast(Tensor, args[argnum])
    cotangent = Tensor.full((), 1.0, dtype=output.dtype, device=output.device)
    (grad_x,) = vjp((output,), (x,), (cotangent,))
    return grad_x

  return NumericalFunction(f"{fn.name}_grad", wrapper, fn.inputs)  # ty: ignore[no-matching-overload]

jacobian(fn, argnum=0)

Compute the Jacobian of fn w.r.t. the input at index argnum.

Parameters:

Name Type Description Default
fn NumericalFunction[I, Tensor]

A NumericalFunction with one or more inputs and a single 1D output.

required
argnum int

Index of the input to differentiate w.r.t. (default 0).

0

Returns:

Type Description
NumericalFunction[I, Tensor]

A NumericalFunction computing the Jacobian matrix (output_dim, input_dim).

Source code in src/anvil/ad/jacobian.py
def jacobian[I: tuple[Arg, ...]](fn: NumericalFunction[I, Tensor], argnum: int = 0) -> NumericalFunction[I, Tensor]:
  """Compute the Jacobian of fn w.r.t. the input at index `argnum`.

  Args:
    fn: A NumericalFunction with one or more inputs and a single 1D output.
    argnum: Index of the input to differentiate w.r.t. (default 0).

  Returns:
    A NumericalFunction computing the Jacobian matrix (output_dim, input_dim).
  """
  assert len(fn.outputs) == 1
  assert 0 <= argnum < len(fn.inputs)
  input_arg = fn.inputs[argnum]
  assert len(inshape := make_more(input_arg.shape)) == 1
  outshape = make_more(fn.outputs[0].shape)
  assert len(outshape) == 1

  def wrapper(*args) -> Tensor:
    if inshape[0] == 0:
      return Tensor.zeros((outshape[0], 0), dtype=input_arg.dtype)

    output = fn.fn(*args)
    x = cast(Tensor, args[argnum])
    eye = Tensor.eye(inshape[0], dtype=x.dtype)
    return dvmap(lambda v: jvp((output,), (x,), (v,))[0], in_axes=1, out_axis=1)(eye)

  return NumericalFunction(f"{fn.name}_jac", wrapper, fn.inputs)  # ty: ignore[no-matching-overload]

spjacobian(fn, argnum=0, unroll=False, sep_uncompression=True, symmetric=False)

Compute the sparse Jacobian of fn w.r.t. the input at index argnum.

Parameters:

Name Type Description Default
fn NumericalFunction[I, Tensor]

A NumericalFunction with one or more inputs and a single 1D output.

required
argnum int

Index of the input to differentiate w.r.t. (default 0).

0
unroll bool

If True, unroll the JVP loop (useful for small problems).

False
sep_uncompression bool

If True, separate the uncompression step (often faster).

True
symmetric bool

If True, use star coloring for symmetric matrices (e.g., Hessians).

False

Returns:

Type Description
SparseNumericalFunction[I]

A SparseNumericalFunction that returns a scipy.sparse.csc_array when called from Python.

Source code in src/anvil/ad/jacobian.py
def spjacobian[I: tuple[Arg, ...]](
  fn: NumericalFunction[I, Tensor], argnum: int = 0, unroll: bool = False, sep_uncompression: bool = True, symmetric: bool = False
) -> SparseNumericalFunction[I]:
  """Compute the sparse Jacobian of fn w.r.t. the input at index `argnum`.

  Args:
    fn: A NumericalFunction with one or more inputs and a single 1D output.
    argnum: Index of the input to differentiate w.r.t. (default 0).
    unroll: If True, unroll the JVP loop (useful for small problems).
    sep_uncompression: If True, separate the uncompression step (often faster).
    symmetric: If True, use star coloring for symmetric matrices (e.g., Hessians).

  Returns:
    A SparseNumericalFunction that returns a ``scipy.sparse.csc_array`` when called from Python.
  """
  assert len(fn.outputs) == 1
  assert 0 <= argnum < len(fn.inputs)
  inputs = fn.inputs
  input_shapes = tuple(make_more(inp.shape) for inp in inputs)
  dtype = inputs[argnum].dtype
  if len(input_shapes[argnum]) == 0:
    raise ValueError("spjacobian does not support scalar inputs (empty shape). Expand the input to shape (1,) or (1,1) or etc.")
  elif prod(input_shapes[argnum]) == 0:
    nnz = 0
    indices = []
    indptr = [0]
    shape = (0, 0)

    def wrapper(*args) -> Tensor:
      return Tensor.empty((0,), dtype=dtype)
  elif unroll:
    coloring = compute_jacobian_coloring(fn, argnum, symmetric=symmetric)
    basis = coloring.jvp_basis
    uncompression_idx = coloring.uncompression_idx
    indices = coloring.indices
    indptr = coloring.indptr
    nnz = indptr[-1]
    shape = coloring.jac_shape

    def wrapper(*args) -> Tensor:
      input, output = cast(Tensor, args[argnum]), fn.fn(*args)
      compressed = Tensor.stack(*[jvp((output,), (input,), (basis[:, j],))[0] for j in range(coloring.ncolors)], dim=1).flatten()
      return Tensor.stack(*[compressed[i] for i in uncompression_idx])
  else:
    coloring = compute_jacobian_coloring(fn, argnum, symmetric=symmetric)
    basis = coloring.jvp_basis
    uncompression_idx = coloring.uncompression_idx
    indices = coloring.indices
    indptr = coloring.indptr
    nnz = indptr[-1]
    shape = coloring.jac_shape

    def wrapper(*args) -> Tensor:
      input, output = cast(Tensor, args[argnum]), fn.fn(*args)
      flattened_compressed_jacobian = dvmap(lambda v: jvp((output,), (input,), (v,))[0], in_axes=1, out_axis=1)(basis).flatten()
      if sep_uncompression:
        flattened_compressed_jacobian = flattened_compressed_jacobian.contiguous()
      return flattened_compressed_jacobian[uncompression_idx]

  return SparseNumericalFunction(f"{fn.name}_jac", wrapper, fn.inputs, shape, nnz, Tensor(indices), Tensor(indptr))

hessian(fn, argnum=0)

Compute the Hessian of a scalar-valued function w.r.t. the input at index argnum.

Parameters:

Name Type Description Default
fn NumericalFunction[I, Tensor]

A NumericalFunction with one or more inputs and a scalar output.

required
argnum int

Index of the input to differentiate w.r.t. (default 0).

0

Returns:

Type Description
NumericalFunction[I, Tensor]

A NumericalFunction computing the Hessian matrix (input_dim, input_dim).

Source code in src/anvil/ad/hessian.py
def hessian[I: tuple[Arg, ...]](fn: NumericalFunction[I, Tensor], argnum: int = 0) -> NumericalFunction[I, Tensor]:
  """Compute the Hessian of a scalar-valued function w.r.t. the input at index `argnum`.

  Args:
    fn: A NumericalFunction with one or more inputs and a scalar output.
    argnum: Index of the input to differentiate w.r.t. (default 0).

  Returns:
    A NumericalFunction computing the Hessian matrix (input_dim, input_dim).
  """
  assert len(fn.outputs) == 1
  assert 0 <= argnum < len(fn.inputs)
  assert len(make_more(fn.inputs[argnum].shape)) == 1, "hessian currently only supports 1D inputs"
  assert len(make_more(fn.outputs[0].shape)) == 0, "hessian requires scalar output"

  grad_fn = gradient(fn, argnum)
  hess_fn = jacobian(grad_fn, argnum)
  return NumericalFunction(f"{fn.name}_hess", hess_fn.fn, hess_fn.inputs)  # ty: ignore[no-matching-overload]

sphessian(fn, argnum=0, unroll=False)

Compute the sparse Hessian of a scalar-valued function w.r.t. the input at index argnum.

The returned sparse matrix is upper-triangular in CSC format (as required by PIQP). Implemented as spjacobian(gradient(fn)), using star coloring for symmetry.

Parameters:

Name Type Description Default
fn NumericalFunction[I, Tensor]

A NumericalFunction with one or more inputs and a scalar output.

required
argnum int

Index of the input to differentiate w.r.t. (default 0).

0
unroll

If True, unroll the inner JVP loop (useful for small problems).

False

Returns:

Type Description
SparseNumericalFunction[I]

A SparseNumericalFunction that returns a scipy.sparse.csc_array when called from Python.

Source code in src/anvil/ad/hessian.py
def sphessian[I: tuple[Arg, ...]](fn: NumericalFunction[I, Tensor], argnum: int = 0, unroll=False) -> SparseNumericalFunction[I]:
  """Compute the sparse Hessian of a scalar-valued function w.r.t. the input at index `argnum`.

  The returned sparse matrix is upper-triangular in CSC format (as required by PIQP).
  Implemented as ``spjacobian(gradient(fn))``, using star coloring for symmetry.

  Args:
    fn: A NumericalFunction with one or more inputs and a scalar output.
    argnum: Index of the input to differentiate w.r.t. (default 0).
    unroll: If True, unroll the inner JVP loop (useful for small problems).

  Returns:
    A SparseNumericalFunction that returns a ``scipy.sparse.csc_array`` when called from Python.
  """
  assert len(fn.outputs) == 1
  assert 0 <= argnum < len(fn.inputs)
  assert len(make_more(fn.inputs[argnum].shape)) == 1, "sphessian currently only supports 1D inputs"
  assert len(make_more(fn.outputs[0].shape)) == 0, "sphessian requires scalar output"

  grad_fn = gradient(fn, argnum)
  hess_fn = spjacobian(grad_fn, argnum, unroll, symmetric=True)
  return SparseNumericalFunction(f"{fn.name}_hess", hess_fn.fn, hess_fn.inputs, hess_fn.shape, hess_fn.nnz, hess_fn.indices, hess_fn.indptr)