Feature/corr matrix and inverse cov matrix as input in least squares function for correlated fits (#223)

* feat: corr_matrix kwargs as input for least squares fit * feat/tests: inverse covariance matrix and correlation matrix kwargs as input for least squares function * feat/tests/example: reduced new kwargs to 'inv_chol_cov_matrix' and outsourced the inversion & cholesky decomposition of the covariance matrix (function 'invert_corr_cov_cholesky(corr, covdiag)') * tests: added tests for inv_chol_cov_matrix kwarg for the case of combined fits * fix: renamed covdiag to inverrdiag needed for the cholesky decomposition and corrected its documentation * examples: added an example of a correlated combined fit to the least_squares documentation * feat/tests/fix(of typos): added function 'sort_corr()' (and a test of it) to sort correlation matrix according to a list of alphabetically sorted keys * docs: added more elaborate documentation/example of sort_corr(), fixed typos in documentation of invert_corr_cov_cholesky()
2025-12-20 22:31:50 +01:00 · 2024-09-13 08:35:10 +02:00 · 2024-09-13 08:35:10 +02:00 · 1d6f7f65c0
commit 1d6f7f65c0
parent 3830e3f777
5 changed files with 596 additions and 46 deletions
--- a/examples/04_fit_example.ipynb
+++ b/examples/04_fit_example.ipynb
--- a/pyerrors/fits.py
+++ b/pyerrors/fits.py
@ -14,7 +14,7 @@ from autograd import hessian as auto_hessian
 from autograd import elementwise_grad as egrad
 from numdifftools import Jacobian as num_jacobian
 from numdifftools import Hessian as num_hessian
-from .obs import Obs, derived_observable, covariance, cov_Obs
+from .obs import Obs, derived_observable, covariance, cov_Obs, invert_corr_cov_cholesky


 class Fit_result(Sequence):
@ -151,6 +151,14 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
        For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
        In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
        This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
+    inv_chol_cov_matrix [array,list], optional
+        array: shape = (no of y values) X (no of y values)
+        list:   for an uncombined fit: [""]
+                for a combined fit: list of keys belonging to the corr_matrix saved in the array, must be the same as the keys of the y dict in alphabetical order
+        If correlated_fit=True is set as well, can provide an inverse covariance matrix (y errors, dy_f included!) of your own choosing for a correlated fit.
+        The matrix must be a lower triangular matrix constructed from a Cholesky decomposition: The function invert_corr_cov_cholesky(corr, inverrdiag) can be
+        used to construct it from a correlation matrix (corr) and the errors dy_f of the data points (inverrdiag = np.diag(1 / np.asarray(dy_f))). For the correct
+        ordering the correlation matrix (corr) can be sorted via the function sort_corr(corr, kl, yd) where kl is the list of keys and yd the y dict.
    expected_chisquare : bool
        If True estimates the expected chisquare which is
        corrected by effects caused by correlated input data (default False).
@ -165,6 +173,57 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
    -------
    output : Fit_result
        Parameters and information on the fitted result.
+    Examples
+    ------
+    >>> # Example of a correlated (correlated_fit = True, inv_chol_cov_matrix handed over) combined fit, based on a randomly generated data set
+    >>> import numpy as np
+    >>> from scipy.stats import norm
+    >>> from scipy.linalg import cholesky
+    >>> import pyerrors as pe
+    >>> # generating the random data set
+    >>> num_samples = 400
+    >>> N = 3
+    >>> x = np.arange(N)
+    >>> x1 = norm.rvs(size=(N, num_samples)) # generate random numbers
+    >>> x2 = norm.rvs(size=(N, num_samples)) # generate random numbers
+    >>> r = r1 = r2 = np.zeros((N, N))
+    >>> y = {}
+    >>> for i in range(N):
+    >>>    for j in range(N):
+    >>>        r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
+    >>> errl = np.sqrt([3.4, 2.5, 3.6]) # set y errors
+    >>> for i in range(N):
+    >>>    for j in range(N):
+    >>>        r[i, j] *= errl[i] * errl[j] # element in covariance matrix
+    >>> c = cholesky(r, lower=True)
+    >>> y = {'a': np.dot(c, x1), 'b': np.dot(c, x2)} # generate y data with the covariance matrix defined
+    >>> # random data set has been generated, now the dictionaries and the inverse covariance matrix to be handed over are built
+    >>> x_dict = {}
+    >>> y_dict = {}
+    >>> chol_inv_dict = {}
+    >>> data = []
+    >>> for key in y.keys():
+    >>>    x_dict[key] = x
+    >>>    for i in range(N):
+    >>>        data.append(pe.Obs([[i + 1 + o for o in y[key][i]]], ['ens'])) # generate y Obs from the y data
+    >>>    [o.gamma_method() for o in data]
+    >>>    corr = pe.covariance(data, correlation=True)
+    >>>    inverrdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
+    >>>    chol_inv = pe.obs.invert_corr_cov_cholesky(corr, inverrdiag) # gives form of the inverse covariance matrix needed for the combined correlated fit below
+    >>> y_dict = {'a': data[:3], 'b': data[3:]}
+    >>> # common fit parameter p[0] in combined fit
+    >>> def fit1(p, x):
+    >>>    return p[0] + p[1] * x
+    >>> def fit2(p, x):
+    >>>    return p[0] + p[2] * x
+    >>> fitf_dict = {'a': fit1, 'b':fit2}
+    >>> fitp_inv_cov_combined_fit = pe.least_squares(x_dict,y_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,['a','b']])
+    Fit with 3 parameters
+    Method: Levenberg-Marquardt
+    `ftol` termination condition is satisfied.
+    chisquare/d.o.f.: 0.5388013574561786 # random
+    fit parameters [1.11897846 0.96361162 0.92325319] # random
+
    '''
    output = Fit_result()

@ -297,15 +356,19 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
        return anp.sum(general_chisqfunc_uncorr(p, y_f, p_f) ** 2)

    if kwargs.get('correlated_fit') is True:
+        if 'inv_chol_cov_matrix' in kwargs:
+            chol_inv = kwargs.get('inv_chol_cov_matrix')
+            if (chol_inv[0].shape[0] != len(dy_f)):
+                raise TypeError('The number of columns of the inverse covariance matrix handed over needs to be equal to the number of y errors.')
+            if (chol_inv[0].shape[0] != chol_inv[0].shape[1]):
+                raise TypeError('The inverse covariance matrix handed over needs to have the same number of rows as columns.')
+            if (chol_inv[1] != key_ls):
+                raise ValueError('The keys of inverse covariance matrix are not the same or do not appear in the same order as the x and y values.')
+            chol_inv = chol_inv[0]
+        else:
            corr = covariance(y_all, correlation=True, **kwargs)
-        covdiag = np.diag(1 / np.asarray(dy_f))
-        condn = np.linalg.cond(corr)
-        if condn > 0.1 / np.finfo(float).eps:
-            raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
-        if condn > 1e13:
-            warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
-        chol = np.linalg.cholesky(corr)
-        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
+            inverrdiag = np.diag(1 / np.asarray(dy_f))
+            chol_inv = invert_corr_cov_cholesky(corr, inverrdiag)

        def general_chisqfunc(p, ivars, pr):
            model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
@ -350,7 +413,6 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):

        fit_result = scipy.optimize.least_squares(chisqfunc_residuals_uncorr, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
        if kwargs.get('correlated_fit') is True:
-
            def chisqfunc_residuals(p):
                return general_chisqfunc(p, y_f, p_f)

--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@ -1544,6 +1544,92 @@ def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
    return cov


+def invert_corr_cov_cholesky(corr, inverrdiag):
+    """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
+       and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
+
+    Parameters
+    ----------
+    corr : np.ndarray
+           correlation matrix
+    inverrdiag : np.ndarray
+              diagonal matrix, the entries are the inverse errors of the data points considered
+    """
+
+    condn = np.linalg.cond(corr)
+    if condn > 0.1 / np.finfo(float).eps:
+        raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
+    if condn > 1e13:
+        warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
+    chol = np.linalg.cholesky(corr)
+    chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
+
+    return chol_inv
+
+
+def sort_corr(corr, kl, yd):
+    """ Reorders a correlation matrix to match the alphabetical order of its underlying y data.
+
+    The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
+    The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
+    that the correlation matrix is based on.
+    This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
+    according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
+    to the y data `yd` when arranged in an alphabetical order by its keys.
+
+    Parameters
+    ----------
+    corr : np.ndarray
+        A square correlation matrix constructed using the order of the y data specified by `kl`.
+        The dimensions of `corr` should match the total number of y data points in `yd` combined.
+    kl : list of str
+        A list of keys that denotes the order in which the y data from `yd` was used to build the
+        input correlation matrix `corr`.
+    yd : dict of list
+        A dictionary where each key corresponds to a unique identifier, and its value is a list of
+        y data points. The total number of y data points across all keys must match the dimensions
+        of `corr`. The lists in the dictionary can be lists of Obs.
+
+    Returns
+    -------
+    np.ndarray
+        A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
+
+    Example
+    -------
+    >>> import numpy as np
+    >>> import pyerrors as pe
+    >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
+    >>> kl = ['b', 'a']
+    >>> yd = {'a': [1, 2], 'b': [3]}
+    >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
+    >>> print(sorted_corr)
+    array([[1. , 0.3, 0.4],
+           [0.3, 1. , 0.2],
+           [0.4, 0.2, 1. ]])
+
+    """
+    kl_sorted = sorted(kl)
+
+    posd = {}
+    ofs = 0
+    for ki, k in enumerate(kl):
+        posd[k] = [i + ofs for i in range(len(yd[k]))]
+        ofs += len(posd[k])
+
+    mapping = []
+    for k in kl_sorted:
+        for i in range(len(yd[k])):
+            mapping.append(posd[k][i])
+
+    corr_sorted = np.zeros_like(corr)
+    for i in range(corr.shape[0]):
+        for j in range(corr.shape[0]):
+            corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
+
+    return corr_sorted
+
+
 def _smooth_eigenvalues(corr, E):
    """Eigenvalue smoothing as described in hep-lat/9412087

--- a/tests/fits_test.py
+++ b/tests/fits_test.py
@ -152,6 +152,124 @@ def test_alternative_solvers():
    chisquare_values = np.array(chisquare_values)
    assert np.all(np.isclose(chisquare_values, chisquare_values[0]))

+def test_inv_cov_matrix_input_least_squares():
+    
+
+    num_samples = 400
+    N = 10
+
+    x = norm.rvs(size=(N, num_samples)) # generate random numbers
+
+    r = np.zeros((N, N))
+    for i in range(N):
+        for j in range(N):
+            r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
+
+    errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2]) # set y errors
+    for i in range(N):
+        for j in range(N):
+            r[i, j] *= errl[i] * errl[j] # element in covariance matrix
+    
+    c = cholesky(r, lower=True)
+    y = np.dot(c, x)
+    x = np.arange(N)
+    x_dict = {}
+    y_dict = {}
+    for i,item in enumerate(x):
+        x_dict[str(item)] = [x[i]]
+
+    for linear in [True, False]:
+        data = []
+        for i in range(N):
+            if linear:
+                data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens']))
+            else:
+                data.append(pe.Obs([[np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i]]], ['ens']))
+
+        [o.gamma_method() for o in data]
+
+        data_dict = {}
+        for i,item in enumerate(x):
+            data_dict[str(item)] = [data[i]]
+
+        corr = pe.covariance(data, correlation=True)
+        chol = np.linalg.cholesky(corr)
+        covdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
+        chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
+        chol_inv_keys = [""]
+        chol_inv_keys_combined_fit = [str(item) for i,item in enumerate(x)]
+
+        if linear:
+            def fitf(p, x):
+                return p[1] + p[0] * x
+            fitf_dict = {}
+            for i,item in enumerate(x):
+                fitf_dict[str(item)] = fitf
+        else:
+            def fitf(p, x):
+                return p[1] * anp.exp(-p[0] * x)
+            fitf_dict = {}
+            for i,item in enumerate(x):
+                fitf_dict[str(item)] = fitf
+
+        fitpc = pe.least_squares(x, data, fitf, correlated_fit=True)
+        fitp_inv_cov = pe.least_squares(x, data, fitf,  correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys])
+        fitp_inv_cov_combined_fit = pe.least_squares(x_dict, data_dict, fitf_dict,  correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys_combined_fit])
+        for i in range(2):
+            diff_inv_cov = fitp_inv_cov[i] - fitpc[i]
+            diff_inv_cov.gamma_method()
+            assert(diff_inv_cov.is_zero(atol=0.0))
+            diff_inv_cov_combined_fit = fitp_inv_cov_combined_fit[i] - fitpc[i]
+            diff_inv_cov_combined_fit.gamma_method()
+            assert(diff_inv_cov_combined_fit.is_zero(atol=1e-12))
+
+def test_least_squares_invalid_inv_cov_matrix_input():
+    xvals = []
+    yvals = []
+    err = 0.1
+    def func_valid(a,x):
+        return a[0] + a[1] * x
+    for x in range(1, 8, 2):
+        xvals.append(x)
+        yvals.append(pe.pseudo_Obs(x + np.random.normal(0.0, err), err, 'test1') + pe.pseudo_Obs(0, err / 100, 'test2', samples=87))
+
+    [o.gamma_method() for o in yvals]
+    
+    #dictionaries for a combined fit
+    xvals_dict = { }
+    yvals_dict = { }
+    for i,item in enumerate(np.arange(1, 8, 2)):
+        xvals_dict[str(item)] = [xvals[i]]
+        yvals_dict[str(item)] = [yvals[i]]
+    chol_inv_keys_combined_fit = ['1', '3', '5', '7']
+    chol_inv_keys_combined_fit_invalid = ['2', '7', '100', '8']
+    func_dict_valid = {"1": func_valid,"3": func_valid,"5": func_valid,"7": func_valid}
+
+    corr_valid = pe.covariance(yvals, correlation = True)
+    chol = np.linalg.cholesky(corr_valid)
+    covdiag = np.diag(1 / np.asarray([o.dvalue for o in yvals]))
+    chol_inv_valid = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
+    chol_inv_keys = [""]
+    pe.least_squares(xvals, yvals,func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys])
+    pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit])
+    chol_inv_invalid_shape1 = np.zeros((len(yvals),len(yvals)-1))
+    chol_inv_invalid_shape2 = np.zeros((len(yvals)+2,len(yvals)))
+
+    # for an uncombined fit
+    with pytest.raises(TypeError):
+        pe.least_squares(xvals, yvals, func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys])
+    with pytest.raises(TypeError):
+        pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys])
+    with pytest.raises(ValueError):
+        pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])
+
+    #repeat for a combined fit
+    with pytest.raises(TypeError):
+        pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys_combined_fit])
+    with pytest.raises(TypeError):
+        pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys_combined_fit])
+    with pytest.raises(ValueError):
+        pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])

 def test_correlated_fit():
    num_samples = 400
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@ -1063,6 +1063,27 @@ def test_covariance_reorder_non_overlapping_data():
    assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)


+def test_sort_corr():
+    xd = {
+    'b': [1, 2, 3],
+    'a': [2.2, 4.4],
+    'c': [3.7, 5.1]
+    }
+
+    yd = {k : pe.cov_Obs(xd[k], [.2 * o for o in xd[k]], k) for k in xd}
+    key_orig = list(yd.keys())
+    y_all = np.concatenate([np.array(yd[key]) for key in key_orig])
+    [o.gm() for o in y_all]
+    cov = pe.covariance(y_all)
+
+    key_ls = key_sorted = sorted(key_orig)
+    y_sorted = np.concatenate([np.array(yd[key]) for key in key_sorted])
+    [o.gm() for o in y_sorted]
+    cov_sorted = pe.covariance(y_sorted)
+    retcov = pe.obs.sort_corr(cov, key_orig, yd)
+    assert np.sum(retcov - cov_sorted) == 0
+
+
 def test_empty_obs():
    o = pe.Obs([np.random.rand(100)], ['test'])
    q = o + pe.Obs([], [], means=[])