1
0
Fork 0
mirror of https://github.com/fjosw/pyerrors.git synced 2025-03-15 14:50:25 +01:00

Feature/corr matrix and inverse cov matrix as input in least squares function for correlated fits ()

* feat: corr_matrix kwargs as input for least squares fit

* feat/tests: inverse covariance matrix and correlation matrix kwargs as input for least squares function

* feat/tests/example: reduced new kwargs to 'inv_chol_cov_matrix' and outsourced the inversion & cholesky decomposition of the covariance matrix (function 'invert_corr_cov_cholesky(corr, covdiag)')

* tests: added tests for inv_chol_cov_matrix kwarg for the case of combined fits

* fix: renamed covdiag to inverrdiag needed for the cholesky decomposition and corrected its documentation

* examples: added an example of a correlated combined fit to the least_squares documentation

* feat/tests/fix(of typos): added function 'sort_corr()' (and a test of it) to sort correlation matrix according to a list of alphabetically sorted keys

* docs: added more elaborate documentation/example of sort_corr(), fixed typos in documentation of invert_corr_cov_cholesky()
This commit is contained in:
Pia Leonie Jones Petrak 2024-09-13 08:35:10 +02:00 committed by GitHub
parent 3830e3f777
commit 1d6f7f65c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 596 additions and 46 deletions

File diff suppressed because one or more lines are too long

View file

@ -14,7 +14,7 @@ from autograd import hessian as auto_hessian
from autograd import elementwise_grad as egrad
from numdifftools import Jacobian as num_jacobian
from numdifftools import Hessian as num_hessian
from .obs import Obs, derived_observable, covariance, cov_Obs
from .obs import Obs, derived_observable, covariance, cov_Obs, invert_corr_cov_cholesky
class Fit_result(Sequence):
@ -151,6 +151,14 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
inv_chol_cov_matrix [array,list], optional
array: shape = (no of y values) X (no of y values)
list: for an uncombined fit: [""]
for a combined fit: list of keys belonging to the corr_matrix saved in the array, must be the same as the keys of the y dict in alphabetical order
If correlated_fit=True is set as well, can provide an inverse covariance matrix (y errors, dy_f included!) of your own choosing for a correlated fit.
The matrix must be a lower triangular matrix constructed from a Cholesky decomposition: The function invert_corr_cov_cholesky(corr, inverrdiag) can be
used to construct it from a correlation matrix (corr) and the errors dy_f of the data points (inverrdiag = np.diag(1 / np.asarray(dy_f))). For the correct
ordering the correlation matrix (corr) can be sorted via the function sort_corr(corr, kl, yd) where kl is the list of keys and yd the y dict.
expected_chisquare : bool
If True estimates the expected chisquare which is
corrected by effects caused by correlated input data (default False).
@ -165,6 +173,57 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
-------
output : Fit_result
Parameters and information on the fitted result.
Examples
------
>>> # Example of a correlated (correlated_fit = True, inv_chol_cov_matrix handed over) combined fit, based on a randomly generated data set
>>> import numpy as np
>>> from scipy.stats import norm
>>> from scipy.linalg import cholesky
>>> import pyerrors as pe
>>> # generating the random data set
>>> num_samples = 400
>>> N = 3
>>> x = np.arange(N)
>>> x1 = norm.rvs(size=(N, num_samples)) # generate random numbers
>>> x2 = norm.rvs(size=(N, num_samples)) # generate random numbers
>>> r = r1 = r2 = np.zeros((N, N))
>>> y = {}
>>> for i in range(N):
>>> for j in range(N):
>>> r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
>>> errl = np.sqrt([3.4, 2.5, 3.6]) # set y errors
>>> for i in range(N):
>>> for j in range(N):
>>> r[i, j] *= errl[i] * errl[j] # element in covariance matrix
>>> c = cholesky(r, lower=True)
>>> y = {'a': np.dot(c, x1), 'b': np.dot(c, x2)} # generate y data with the covariance matrix defined
>>> # random data set has been generated, now the dictionaries and the inverse covariance matrix to be handed over are built
>>> x_dict = {}
>>> y_dict = {}
>>> chol_inv_dict = {}
>>> data = []
>>> for key in y.keys():
>>> x_dict[key] = x
>>> for i in range(N):
>>> data.append(pe.Obs([[i + 1 + o for o in y[key][i]]], ['ens'])) # generate y Obs from the y data
>>> [o.gamma_method() for o in data]
>>> corr = pe.covariance(data, correlation=True)
>>> inverrdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
>>> chol_inv = pe.obs.invert_corr_cov_cholesky(corr, inverrdiag) # gives form of the inverse covariance matrix needed for the combined correlated fit below
>>> y_dict = {'a': data[:3], 'b': data[3:]}
>>> # common fit parameter p[0] in combined fit
>>> def fit1(p, x):
>>> return p[0] + p[1] * x
>>> def fit2(p, x):
>>> return p[0] + p[2] * x
>>> fitf_dict = {'a': fit1, 'b':fit2}
>>> fitp_inv_cov_combined_fit = pe.least_squares(x_dict,y_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,['a','b']])
Fit with 3 parameters
Method: Levenberg-Marquardt
`ftol` termination condition is satisfied.
chisquare/d.o.f.: 0.5388013574561786 # random
fit parameters [1.11897846 0.96361162 0.92325319] # random
'''
output = Fit_result()
@ -297,15 +356,19 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
return anp.sum(general_chisqfunc_uncorr(p, y_f, p_f) ** 2)
if kwargs.get('correlated_fit') is True:
if 'inv_chol_cov_matrix' in kwargs:
chol_inv = kwargs.get('inv_chol_cov_matrix')
if (chol_inv[0].shape[0] != len(dy_f)):
raise TypeError('The number of columns of the inverse covariance matrix handed over needs to be equal to the number of y errors.')
if (chol_inv[0].shape[0] != chol_inv[0].shape[1]):
raise TypeError('The inverse covariance matrix handed over needs to have the same number of rows as columns.')
if (chol_inv[1] != key_ls):
raise ValueError('The keys of inverse covariance matrix are not the same or do not appear in the same order as the x and y values.')
chol_inv = chol_inv[0]
else:
corr = covariance(y_all, correlation=True, **kwargs)
covdiag = np.diag(1 / np.asarray(dy_f))
condn = np.linalg.cond(corr)
if condn > 0.1 / np.finfo(float).eps:
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
if condn > 1e13:
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
chol = np.linalg.cholesky(corr)
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
inverrdiag = np.diag(1 / np.asarray(dy_f))
chol_inv = invert_corr_cov_cholesky(corr, inverrdiag)
def general_chisqfunc(p, ivars, pr):
model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
@ -350,7 +413,6 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
fit_result = scipy.optimize.least_squares(chisqfunc_residuals_uncorr, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
if kwargs.get('correlated_fit') is True:
def chisqfunc_residuals(p):
return general_chisqfunc(p, y_f, p_f)

View file

@ -1544,6 +1544,92 @@ def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
return cov
def invert_corr_cov_cholesky(corr, inverrdiag):
"""Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
Parameters
----------
corr : np.ndarray
correlation matrix
inverrdiag : np.ndarray
diagonal matrix, the entries are the inverse errors of the data points considered
"""
condn = np.linalg.cond(corr)
if condn > 0.1 / np.finfo(float).eps:
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
if condn > 1e13:
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
chol = np.linalg.cholesky(corr)
chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
return chol_inv
def sort_corr(corr, kl, yd):
""" Reorders a correlation matrix to match the alphabetical order of its underlying y data.
The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
that the correlation matrix is based on.
This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
to the y data `yd` when arranged in an alphabetical order by its keys.
Parameters
----------
corr : np.ndarray
A square correlation matrix constructed using the order of the y data specified by `kl`.
The dimensions of `corr` should match the total number of y data points in `yd` combined.
kl : list of str
A list of keys that denotes the order in which the y data from `yd` was used to build the
input correlation matrix `corr`.
yd : dict of list
A dictionary where each key corresponds to a unique identifier, and its value is a list of
y data points. The total number of y data points across all keys must match the dimensions
of `corr`. The lists in the dictionary can be lists of Obs.
Returns
-------
np.ndarray
A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
Example
-------
>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
[0.3, 1. , 0.2],
[0.4, 0.2, 1. ]])
"""
kl_sorted = sorted(kl)
posd = {}
ofs = 0
for ki, k in enumerate(kl):
posd[k] = [i + ofs for i in range(len(yd[k]))]
ofs += len(posd[k])
mapping = []
for k in kl_sorted:
for i in range(len(yd[k])):
mapping.append(posd[k][i])
corr_sorted = np.zeros_like(corr)
for i in range(corr.shape[0]):
for j in range(corr.shape[0]):
corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
return corr_sorted
def _smooth_eigenvalues(corr, E):
"""Eigenvalue smoothing as described in hep-lat/9412087

View file

@ -152,6 +152,124 @@ def test_alternative_solvers():
chisquare_values = np.array(chisquare_values)
assert np.all(np.isclose(chisquare_values, chisquare_values[0]))
def test_inv_cov_matrix_input_least_squares():
num_samples = 400
N = 10
x = norm.rvs(size=(N, num_samples)) # generate random numbers
r = np.zeros((N, N))
for i in range(N):
for j in range(N):
r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2]) # set y errors
for i in range(N):
for j in range(N):
r[i, j] *= errl[i] * errl[j] # element in covariance matrix
c = cholesky(r, lower=True)
y = np.dot(c, x)
x = np.arange(N)
x_dict = {}
y_dict = {}
for i,item in enumerate(x):
x_dict[str(item)] = [x[i]]
for linear in [True, False]:
data = []
for i in range(N):
if linear:
data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens']))
else:
data.append(pe.Obs([[np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i]]], ['ens']))
[o.gamma_method() for o in data]
data_dict = {}
for i,item in enumerate(x):
data_dict[str(item)] = [data[i]]
corr = pe.covariance(data, correlation=True)
chol = np.linalg.cholesky(corr)
covdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
chol_inv_keys = [""]
chol_inv_keys_combined_fit = [str(item) for i,item in enumerate(x)]
if linear:
def fitf(p, x):
return p[1] + p[0] * x
fitf_dict = {}
for i,item in enumerate(x):
fitf_dict[str(item)] = fitf
else:
def fitf(p, x):
return p[1] * anp.exp(-p[0] * x)
fitf_dict = {}
for i,item in enumerate(x):
fitf_dict[str(item)] = fitf
fitpc = pe.least_squares(x, data, fitf, correlated_fit=True)
fitp_inv_cov = pe.least_squares(x, data, fitf, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys])
fitp_inv_cov_combined_fit = pe.least_squares(x_dict, data_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys_combined_fit])
for i in range(2):
diff_inv_cov = fitp_inv_cov[i] - fitpc[i]
diff_inv_cov.gamma_method()
assert(diff_inv_cov.is_zero(atol=0.0))
diff_inv_cov_combined_fit = fitp_inv_cov_combined_fit[i] - fitpc[i]
diff_inv_cov_combined_fit.gamma_method()
assert(diff_inv_cov_combined_fit.is_zero(atol=1e-12))
def test_least_squares_invalid_inv_cov_matrix_input():
xvals = []
yvals = []
err = 0.1
def func_valid(a,x):
return a[0] + a[1] * x
for x in range(1, 8, 2):
xvals.append(x)
yvals.append(pe.pseudo_Obs(x + np.random.normal(0.0, err), err, 'test1') + pe.pseudo_Obs(0, err / 100, 'test2', samples=87))
[o.gamma_method() for o in yvals]
#dictionaries for a combined fit
xvals_dict = { }
yvals_dict = { }
for i,item in enumerate(np.arange(1, 8, 2)):
xvals_dict[str(item)] = [xvals[i]]
yvals_dict[str(item)] = [yvals[i]]
chol_inv_keys_combined_fit = ['1', '3', '5', '7']
chol_inv_keys_combined_fit_invalid = ['2', '7', '100', '8']
func_dict_valid = {"1": func_valid,"3": func_valid,"5": func_valid,"7": func_valid}
corr_valid = pe.covariance(yvals, correlation = True)
chol = np.linalg.cholesky(corr_valid)
covdiag = np.diag(1 / np.asarray([o.dvalue for o in yvals]))
chol_inv_valid = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
chol_inv_keys = [""]
pe.least_squares(xvals, yvals,func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys])
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit])
chol_inv_invalid_shape1 = np.zeros((len(yvals),len(yvals)-1))
chol_inv_invalid_shape2 = np.zeros((len(yvals)+2,len(yvals)))
# for an uncombined fit
with pytest.raises(TypeError):
pe.least_squares(xvals, yvals, func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys])
with pytest.raises(TypeError):
pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys])
with pytest.raises(ValueError):
pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])
#repeat for a combined fit
with pytest.raises(TypeError):
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys_combined_fit])
with pytest.raises(TypeError):
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys_combined_fit])
with pytest.raises(ValueError):
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])
def test_correlated_fit():
num_samples = 400

View file

@ -1063,6 +1063,27 @@ def test_covariance_reorder_non_overlapping_data():
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
def test_sort_corr():
xd = {
'b': [1, 2, 3],
'a': [2.2, 4.4],
'c': [3.7, 5.1]
}
yd = {k : pe.cov_Obs(xd[k], [.2 * o for o in xd[k]], k) for k in xd}
key_orig = list(yd.keys())
y_all = np.concatenate([np.array(yd[key]) for key in key_orig])
[o.gm() for o in y_all]
cov = pe.covariance(y_all)
key_ls = key_sorted = sorted(key_orig)
y_sorted = np.concatenate([np.array(yd[key]) for key in key_sorted])
[o.gm() for o in y_sorted]
cov_sorted = pe.covariance(y_sorted)
retcov = pe.obs.sort_corr(cov, key_orig, yd)
assert np.sum(retcov - cov_sorted) == 0
def test_empty_obs():
o = pe.Obs([np.random.rand(100)], ['test'])
q = o + pe.Obs([], [], means=[])