Source code for pysolvegn.study_optimization

"""
pysolve-gn - Robust Gauss-Newton Least Squares Solver.
Copyright (C) 2026 Artezaru, artezaru.github@proton.me

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""

from typing import Union, Sequence, Callable, Optional
from numpy.typing import ArrayLike
from numbers import Real

import numpy
import scipy

from .rho_functions import (
    linear_rho_at_R2,
    soft_l1_rho_at_R2,
    cauchy_rho_at_R2,
    arctan_rho_at_R2,
    _build_tilde_R_and_tilde_J,
)



[docs]
def study_jacobian(
    residual_func: Union[Callable, Sequence[Callable]],
    jacobian_func: Union[Callable, Sequence[Callable]],
    parameters: ArrayLike,
    *,
    weight: Optional[Union[str, Sequence[str]]] = None,
    loss: Optional[Union[str, Sequence[str]]] = None,
    title: str = "",
) -> None:
    r"""
    Study the Jacobian matrix of the least squares problem to analyze the observability
    of the parameters and the convergence properties of the optimization
    (called by verbosity level 3 of the Gauss-Newton solver).

    The function accepts multiple terms in the least squares problem,
    each with its own residual function, Jacobian function, weight, and loss function
    studying the following optimization problem

    .. math::

        \min_{\mathbf{p}} \frac{1}{2} \sum_{i} w_i \sum_j \rho_i\left(\| \mathbf{R}_{i,j}(\mathbf{p}) \|^2\right)

    Then the function displays the following information about the Jacobian matrix of the least squares problem:

    - Density, Cost contribution and loss function for each term in the least squares problem.
    - Singular values of the combined Jacobian matrix :math:`\sum_i w_i \mathbf{J}_i^T \mathbf{J}_i`
      and their contribution to the variance of the parameters.
    - Condition number of the combined Jacobian matrix, which is an indicator
      of the convergence properties of the optimization.
    - Singular vectors of the combined Jacobian matrix, which can be used to analyze the
      observability of the parameters and the sensitivity of the optimization to noise in the data.
    - Parameter sensitivity analysis based on the covariance matrix of the parameters,
      which is computed as :

    .. math::

        \Sigma = \sigma^2 \left( \sum_i w_i \mathbf{J}_i^T \mathbf{J}_i \right)^{-1}

    where :math:`\sigma^2` is the estimated variance of the residuals, computed as
    :math:`\sigma^2 = \frac{2C_0}{N_{eq}-N_p}` with :math:`C_0` the cost of the first
    term (assuming data), :math:`N_{eq}` the number of equations and :math:`N_p` the
    number of parameters.



    Parameters
    ----------
    residual_func: Union[Callable, Sequence[Callable]]
        The function(s) to compute the residuals for each term in the least squares problem.
        Can be a single function or a sequence of functions.
        Each callable represent the `i`-th term :math:`\mathbf{R}_i(\mathbf{p})`
        in the least squares problem and should take as inputs the parameters
        (array-like with shape (n_parameters,)) and return the residuals as a
        1D numpy array with shape (n_residuals_i,) representing each
        :math:`\mathbf{R}_{i,j}(\mathbf{p})` where :math:`j=0,...,n_{\text{residuals}_i-1}`.

    jacobian_func: Union[Callable, Sequence[Callable]]
        The function(s) to compute the Jacobian matrices for each term in the
        least squares problem. Can be a single function or a sequence of functions.
        Each callable represent the `i`-th term :math:`\mathbf{J}_i(\mathbf{p})`
        in the least squares problem such as
        :math:`\mathbf{J}_i(\mathbf{p}) = \frac{\partial \mathbf{R}_i(\mathbf{p})}{\partial \mathbf{p}}`
        and should take as inputs the parameters (array-like with shape (n_parameters,))
        and return the Jacobian matrix as a 2D numpy array with shape
        (n_residuals_i, n_parameters) representing the Jacobian of each residual
        :math:`\mathbf{R}_{i,j}(\mathbf{p})` where :math:`j=0,...,n_{\text{residuals}_i-1}`.

    parameters: ArrayLike
        The parameters of the optimization step with shape (n_parameters,).

    weight: Optional[Union[Real, Sequence[Real]]], optional (default=None)
        The weight(s) for each term in the least squares problem.
        Can be a single value or a sequence of values.
        Each weight :math:`w_i` is a weight for the corresponding term in
        the least squares problem and should be a non-negative real number.
        If a single value is provided, it will be used for all terms in the
        least squares problem. If None is provided, it will be set to 1.0.

    loss: Optional[Union[str, Sequence[str]]], optional (default=None)
        The loss function(s) for each term in the least squares problem.
        Can be a single string or a sequence of strings.
        Each loss function :math:`\rho_i` is a robust cost function for the corresponding
        term in the least squares problem and should be one of the following strings:
        ["linear", "soft_l1", "cauchy", "arctan"].
        If a single string is provided, it will be used for all terms in the least
        squares problem. If None is provided, it will be set to "linear".

    title: str, optional
        An optional title for the analysis, which can be used for logging purposes.
        Default is an empty string.


    Returns
    -------
    None
        This function is used for analysis and does not return any value.


    Version
    -------
    - 0.0.1: Initial version.
    - 0.0.2: :math:`\sigma^2` is now estimated as :math:`\sigma^2 = 2C_0/(N_{eq}-N_p)`
      where :math:`C_0` is the cost of the first term (assuming data) instead of
      the total cost, to provide a more accurate estimation of the residual variance
      for the parameter sensitivity analysis.

    """
    if not isinstance(residual_func, Sequence):
        residual_func = [residual_func]
    if not isinstance(jacobian_func, Sequence):
        jacobian_func = [jacobian_func]

    if not len(residual_func) == len(jacobian_func):
        raise ValueError(
            "The length of residual_func and jacobian_func must be the same."
        )
    if not all(callable(func) for func in residual_func + jacobian_func):
        raise ValueError(
            "All elements in residual_func and jacobian_func must be callable."
        )

    if weight is None:
        weight = 1.0
    if not isinstance(weight, Sequence):
        weight = [weight]
    if len(weight) == 1:
        weight = weight * len(residual_func)
    if not len(weight) == len(residual_func):
        raise ValueError(
            "The length of weight must be the same as the length of residual_func and jacobian_func."
        )
    if not all(isinstance(w, Real) for w in weight):
        raise ValueError("All elements in weight must be real numbers.")
    weight = [float(w) for w in weight]  # Ensure weights are floats

    if loss is None:
        loss = "linear"
    if not isinstance(loss, Sequence):
        loss = [loss]
    loss = [l if l is not None else "linear" for l in loss]  # None -> "linear"
    if len(loss) == 1:
        loss = loss * len(residual_func)
    if not len(loss) == len(residual_func):
        raise ValueError(
            "The length of loss must be the same as the length of residual_func and jacobian_func."
        )
    if not all(isinstance(l, str) for l in loss):
        raise ValueError("All elements in loss must be strings.")
    valid_loss_functions = {"linear", "soft_l1", "cauchy", "arctan"}
    if not all(l in valid_loss_functions for l in loss):
        raise ValueError(f"All elements in loss must be one of {valid_loss_functions}.")

    parameters = numpy.asarray(parameters, dtype=numpy.float64)
    if not isinstance(parameters, numpy.ndarray) and parameters.ndim == 1:
        raise ValueError("Parameters must be a 1D numpy array.")

    if not isinstance(title, str):
        raise ValueError("Title must be a string.")

    # Prepare the robust cost functions for each term in the least squares problem
    rho_func = []
    for l in loss:
        if l == "linear":
            rho_func.append(linear_rho_at_R2)
        elif l == "soft_l1":
            rho_func.append(soft_l1_rho_at_R2)
        elif l == "cauchy":
            rho_func.append(cauchy_rho_at_R2)
        elif l == "arctan":
            rho_func.append(arctan_rho_at_R2)
        else:
            raise ValueError(
                f"Invalid loss function '{l}'. Valid options are 'linear', 'soft_l1', 'cauchy', 'arctan'."
            )

    _n_terms = len(residual_func)
    _n_parameters = parameters.size
    _parameters = parameters.copy()

    # Compute the residuals, Jacobian and robust cost function values for each term in the least squares problem
    residual_arrays = [R_func(_parameters) for R_func in residual_func]
    jacobian_matrices = [J_func(_parameters) for J_func in jacobian_func]
    rhos_arrays = [r_func(R) for r_func, R in zip(rho_func, residual_arrays)]

    # Cost computation
    costs = [0.5 * numpy.sum(rho[0]) for rho in rhos_arrays]
    total_cost = sum(w * c for w, c in zip(weight, costs))

    # Build tilted residuals and Jacobian based on the robust cost function
    for i in range(_n_terms):
        residual_arrays[i], jacobian_matrices[i] = _build_tilde_R_and_tilde_J(
            residual_arrays[i],
            jacobian_matrices[i],
            rhos_arrays[i][1],
            rhos_arrays[i][2],
        )

    # ----------- Title print ----------------
    print("\n" + "=" * 50)
    print("\n" + "-" * 50)
    print(f"{title:^50}")
    print("-" * 50 + "\n")

    # ----------- Global informations -----------
    print(f"Number of terms in the least squares problem: {_n_terms}")
    _n_equations = residual_arrays[0].size
    print(f"Number of equations in the first term (assuming data): {_n_equations}")
    print(f"Number of parameters: {_n_parameters}")
    print(f"\nTotal cost value Ctot = ½Σ w*ρ(|R|²): {total_cost:.3e}")

    # ----------- Sub term contributions -----------
    header = f"\n{'LS Term':^10} {'Nequations':^12} {'Nparams':^10} {'Density (%)':^15} {'Loss ρ':^10} {'Weight w':^10} {'Cost ½ρ(|R|²)':^15} {'Cost (%)':^10}"
    print(header)
    for i, J in enumerate(jacobian_matrices):
        if scipy.sparse.issparse(J):
            n_residuals_i, n_parameters_i = J.shape
            density = 100.0 * J.nnz / (n_residuals_i * n_parameters_i)
        else:
            n_residuals_i, n_parameters_i = J.shape
            density = 100.0 * numpy.count_nonzero(J) / (n_residuals_i * n_parameters_i)
        cost_i = costs[i]
        weight_i = weight[i]
        loss_i = loss[i]
        cost_percent = int(
            round(
                100.0 * cost_i * weight_i / total_cost if total_cost > 0 else "inf", 0
            )
        )
        row = f"{i:^10} {n_residuals_i:^12} {n_parameters_i:^10} {density:^15.2f} {loss_i:^10} {weight_i:^10.2e} {cost_i:^15.3e} {cost_percent:^10.2f}"
        print(row)

    # ----------- Singular values analysis -----------
    M = sum(w * (J.T @ J) for w, J in zip(weight, jacobian_matrices))
    M = M.toarray() if scipy.sparse.issparse(M) else M
    U, S, Vt = numpy.linalg.svd(M, full_matrices=False)
    s_max = S[0]
    s_min = S[-1]
    cond = s_max / s_min if s_min > 1e-12 else float("inf")

    print(f"\nSingular values λ² (max/min) of Σ wJᵀJ : {s_max:.3e}/{s_min:.3e}")
    print(f"Condition number (max λ² / min λ²): {cond:.3e}")
    header = f"\n{'λ² Index':^10} {'λ² Value':^15} {'Var 1/λ²':^15}"
    print(header)
    for i, s in enumerate(S):
        var = 1.0 / s if s > 1e-12 else float("inf")  # Avoid division by zero
        print(f"{i:^10} {s:^15.3e} {var:^15.3e}")

    # ----------- Parameter sensitivity analysis -----------
    sigma_2 = (
        2 * costs[0] / (_n_equations - _n_parameters)
        if _n_equations > _n_parameters
        else float("inf")
    )
    cov = (
        sigma_2 * numpy.linalg.inv(M) if _n_equations > _n_parameters else float("inf")
    )
    print(f"\nVt = (right singular vectors) of the combined Jacobian matrix Σ wJᵀJ:")
    print(f"Estimated residual variance σ² = 2*C0/(Neq-Np): {sigma_2:.3e}")

    indices = [0, -2, -1] if _n_parameters >= 3 else list(range(_n_parameters))
    header = f"\n{'Param Index':^15} {'Value P':^10} {'Var V=σ²M⁻¹':^15} {'Ratio √V/|P|':^15}"
    for i in indices:
        header += f" {'Vt[' + str(i) + ']':^15}"
    print(header)

    for j in range(_n_parameters):
        var = cov[j, j] if _n_equations > _n_parameters else float("inf")
        ratio = (
            numpy.sqrt(var) / abs(parameters[j]) if parameters[j] != 0 else float("inf")
        )
        row = f"{j:^15} {parameters[j]:^10.3e} {var:^15.3e} {ratio:^15.3e}"
        for i in indices:
            row += f" {Vt[i, j]:^15.3e}"
        print(row)

    # ----------- End of analysis -----------
    print("\n" + "=" * 50 + "\n")