mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-03-15 06:40:24 +01:00
Feature/corr matrix and inverse cov matrix as input in least squares function for correlated fits (#223)
* feat: corr_matrix kwargs as input for least squares fit * feat/tests: inverse covariance matrix and correlation matrix kwargs as input for least squares function * feat/tests/example: reduced new kwargs to 'inv_chol_cov_matrix' and outsourced the inversion & cholesky decomposition of the covariance matrix (function 'invert_corr_cov_cholesky(corr, covdiag)') * tests: added tests for inv_chol_cov_matrix kwarg for the case of combined fits * fix: renamed covdiag to inverrdiag needed for the cholesky decomposition and corrected its documentation * examples: added an example of a correlated combined fit to the least_squares documentation * feat/tests/fix(of typos): added function 'sort_corr()' (and a test of it) to sort correlation matrix according to a list of alphabetically sorted keys * docs: added more elaborate documentation/example of sort_corr(), fixed typos in documentation of invert_corr_cov_cholesky()
This commit is contained in:
parent
3830e3f777
commit
1d6f7f65c0
5 changed files with 596 additions and 46 deletions
File diff suppressed because one or more lines are too long
|
@ -14,7 +14,7 @@ from autograd import hessian as auto_hessian
|
|||
from autograd import elementwise_grad as egrad
|
||||
from numdifftools import Jacobian as num_jacobian
|
||||
from numdifftools import Hessian as num_hessian
|
||||
from .obs import Obs, derived_observable, covariance, cov_Obs
|
||||
from .obs import Obs, derived_observable, covariance, cov_Obs, invert_corr_cov_cholesky
|
||||
|
||||
|
||||
class Fit_result(Sequence):
|
||||
|
@ -151,6 +151,14 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
|
|||
For details about how the covariance matrix is estimated see `pyerrors.obs.covariance`.
|
||||
In practice the correlation matrix is Cholesky decomposed and inverted (instead of the covariance matrix).
|
||||
This procedure should be numerically more stable as the correlation matrix is typically better conditioned (Jacobi preconditioning).
|
||||
inv_chol_cov_matrix [array,list], optional
|
||||
array: shape = (no of y values) X (no of y values)
|
||||
list: for an uncombined fit: [""]
|
||||
for a combined fit: list of keys belonging to the corr_matrix saved in the array, must be the same as the keys of the y dict in alphabetical order
|
||||
If correlated_fit=True is set as well, can provide an inverse covariance matrix (y errors, dy_f included!) of your own choosing for a correlated fit.
|
||||
The matrix must be a lower triangular matrix constructed from a Cholesky decomposition: The function invert_corr_cov_cholesky(corr, inverrdiag) can be
|
||||
used to construct it from a correlation matrix (corr) and the errors dy_f of the data points (inverrdiag = np.diag(1 / np.asarray(dy_f))). For the correct
|
||||
ordering the correlation matrix (corr) can be sorted via the function sort_corr(corr, kl, yd) where kl is the list of keys and yd the y dict.
|
||||
expected_chisquare : bool
|
||||
If True estimates the expected chisquare which is
|
||||
corrected by effects caused by correlated input data (default False).
|
||||
|
@ -165,6 +173,57 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
|
|||
-------
|
||||
output : Fit_result
|
||||
Parameters and information on the fitted result.
|
||||
Examples
|
||||
------
|
||||
>>> # Example of a correlated (correlated_fit = True, inv_chol_cov_matrix handed over) combined fit, based on a randomly generated data set
|
||||
>>> import numpy as np
|
||||
>>> from scipy.stats import norm
|
||||
>>> from scipy.linalg import cholesky
|
||||
>>> import pyerrors as pe
|
||||
>>> # generating the random data set
|
||||
>>> num_samples = 400
|
||||
>>> N = 3
|
||||
>>> x = np.arange(N)
|
||||
>>> x1 = norm.rvs(size=(N, num_samples)) # generate random numbers
|
||||
>>> x2 = norm.rvs(size=(N, num_samples)) # generate random numbers
|
||||
>>> r = r1 = r2 = np.zeros((N, N))
|
||||
>>> y = {}
|
||||
>>> for i in range(N):
|
||||
>>> for j in range(N):
|
||||
>>> r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
|
||||
>>> errl = np.sqrt([3.4, 2.5, 3.6]) # set y errors
|
||||
>>> for i in range(N):
|
||||
>>> for j in range(N):
|
||||
>>> r[i, j] *= errl[i] * errl[j] # element in covariance matrix
|
||||
>>> c = cholesky(r, lower=True)
|
||||
>>> y = {'a': np.dot(c, x1), 'b': np.dot(c, x2)} # generate y data with the covariance matrix defined
|
||||
>>> # random data set has been generated, now the dictionaries and the inverse covariance matrix to be handed over are built
|
||||
>>> x_dict = {}
|
||||
>>> y_dict = {}
|
||||
>>> chol_inv_dict = {}
|
||||
>>> data = []
|
||||
>>> for key in y.keys():
|
||||
>>> x_dict[key] = x
|
||||
>>> for i in range(N):
|
||||
>>> data.append(pe.Obs([[i + 1 + o for o in y[key][i]]], ['ens'])) # generate y Obs from the y data
|
||||
>>> [o.gamma_method() for o in data]
|
||||
>>> corr = pe.covariance(data, correlation=True)
|
||||
>>> inverrdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
|
||||
>>> chol_inv = pe.obs.invert_corr_cov_cholesky(corr, inverrdiag) # gives form of the inverse covariance matrix needed for the combined correlated fit below
|
||||
>>> y_dict = {'a': data[:3], 'b': data[3:]}
|
||||
>>> # common fit parameter p[0] in combined fit
|
||||
>>> def fit1(p, x):
|
||||
>>> return p[0] + p[1] * x
|
||||
>>> def fit2(p, x):
|
||||
>>> return p[0] + p[2] * x
|
||||
>>> fitf_dict = {'a': fit1, 'b':fit2}
|
||||
>>> fitp_inv_cov_combined_fit = pe.least_squares(x_dict,y_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,['a','b']])
|
||||
Fit with 3 parameters
|
||||
Method: Levenberg-Marquardt
|
||||
`ftol` termination condition is satisfied.
|
||||
chisquare/d.o.f.: 0.5388013574561786 # random
|
||||
fit parameters [1.11897846 0.96361162 0.92325319] # random
|
||||
|
||||
'''
|
||||
output = Fit_result()
|
||||
|
||||
|
@ -297,15 +356,19 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
|
|||
return anp.sum(general_chisqfunc_uncorr(p, y_f, p_f) ** 2)
|
||||
|
||||
if kwargs.get('correlated_fit') is True:
|
||||
if 'inv_chol_cov_matrix' in kwargs:
|
||||
chol_inv = kwargs.get('inv_chol_cov_matrix')
|
||||
if (chol_inv[0].shape[0] != len(dy_f)):
|
||||
raise TypeError('The number of columns of the inverse covariance matrix handed over needs to be equal to the number of y errors.')
|
||||
if (chol_inv[0].shape[0] != chol_inv[0].shape[1]):
|
||||
raise TypeError('The inverse covariance matrix handed over needs to have the same number of rows as columns.')
|
||||
if (chol_inv[1] != key_ls):
|
||||
raise ValueError('The keys of inverse covariance matrix are not the same or do not appear in the same order as the x and y values.')
|
||||
chol_inv = chol_inv[0]
|
||||
else:
|
||||
corr = covariance(y_all, correlation=True, **kwargs)
|
||||
covdiag = np.diag(1 / np.asarray(dy_f))
|
||||
condn = np.linalg.cond(corr)
|
||||
if condn > 0.1 / np.finfo(float).eps:
|
||||
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
|
||||
if condn > 1e13:
|
||||
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
|
||||
chol = np.linalg.cholesky(corr)
|
||||
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
|
||||
inverrdiag = np.diag(1 / np.asarray(dy_f))
|
||||
chol_inv = invert_corr_cov_cholesky(corr, inverrdiag)
|
||||
|
||||
def general_chisqfunc(p, ivars, pr):
|
||||
model = anp.concatenate([anp.array(funcd[key](p, xd[key])).reshape(-1) for key in key_ls])
|
||||
|
@ -350,7 +413,6 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs):
|
|||
|
||||
fit_result = scipy.optimize.least_squares(chisqfunc_residuals_uncorr, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
|
||||
if kwargs.get('correlated_fit') is True:
|
||||
|
||||
def chisqfunc_residuals(p):
|
||||
return general_chisqfunc(p, y_f, p_f)
|
||||
|
||||
|
|
|
@ -1544,6 +1544,92 @@ def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
|
|||
return cov
|
||||
|
||||
|
||||
def invert_corr_cov_cholesky(corr, inverrdiag):
|
||||
"""Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr`
|
||||
and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
corr : np.ndarray
|
||||
correlation matrix
|
||||
inverrdiag : np.ndarray
|
||||
diagonal matrix, the entries are the inverse errors of the data points considered
|
||||
"""
|
||||
|
||||
condn = np.linalg.cond(corr)
|
||||
if condn > 0.1 / np.finfo(float).eps:
|
||||
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
|
||||
if condn > 1e13:
|
||||
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
|
||||
chol = np.linalg.cholesky(corr)
|
||||
chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True)
|
||||
|
||||
return chol_inv
|
||||
|
||||
|
||||
def sort_corr(corr, kl, yd):
|
||||
""" Reorders a correlation matrix to match the alphabetical order of its underlying y data.
|
||||
|
||||
The ordering of the input correlation matrix `corr` is given by the list of keys `kl`.
|
||||
The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data
|
||||
that the correlation matrix is based on.
|
||||
This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr`
|
||||
according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds
|
||||
to the y data `yd` when arranged in an alphabetical order by its keys.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
corr : np.ndarray
|
||||
A square correlation matrix constructed using the order of the y data specified by `kl`.
|
||||
The dimensions of `corr` should match the total number of y data points in `yd` combined.
|
||||
kl : list of str
|
||||
A list of keys that denotes the order in which the y data from `yd` was used to build the
|
||||
input correlation matrix `corr`.
|
||||
yd : dict of list
|
||||
A dictionary where each key corresponds to a unique identifier, and its value is a list of
|
||||
y data points. The total number of y data points across all keys must match the dimensions
|
||||
of `corr`. The lists in the dictionary can be lists of Obs.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray
|
||||
A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys.
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> import numpy as np
|
||||
>>> import pyerrors as pe
|
||||
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
|
||||
>>> kl = ['b', 'a']
|
||||
>>> yd = {'a': [1, 2], 'b': [3]}
|
||||
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
|
||||
>>> print(sorted_corr)
|
||||
array([[1. , 0.3, 0.4],
|
||||
[0.3, 1. , 0.2],
|
||||
[0.4, 0.2, 1. ]])
|
||||
|
||||
"""
|
||||
kl_sorted = sorted(kl)
|
||||
|
||||
posd = {}
|
||||
ofs = 0
|
||||
for ki, k in enumerate(kl):
|
||||
posd[k] = [i + ofs for i in range(len(yd[k]))]
|
||||
ofs += len(posd[k])
|
||||
|
||||
mapping = []
|
||||
for k in kl_sorted:
|
||||
for i in range(len(yd[k])):
|
||||
mapping.append(posd[k][i])
|
||||
|
||||
corr_sorted = np.zeros_like(corr)
|
||||
for i in range(corr.shape[0]):
|
||||
for j in range(corr.shape[0]):
|
||||
corr_sorted[i][j] = corr[mapping[i]][mapping[j]]
|
||||
|
||||
return corr_sorted
|
||||
|
||||
|
||||
def _smooth_eigenvalues(corr, E):
|
||||
"""Eigenvalue smoothing as described in hep-lat/9412087
|
||||
|
||||
|
|
|
@ -152,6 +152,124 @@ def test_alternative_solvers():
|
|||
chisquare_values = np.array(chisquare_values)
|
||||
assert np.all(np.isclose(chisquare_values, chisquare_values[0]))
|
||||
|
||||
def test_inv_cov_matrix_input_least_squares():
|
||||
|
||||
|
||||
num_samples = 400
|
||||
N = 10
|
||||
|
||||
x = norm.rvs(size=(N, num_samples)) # generate random numbers
|
||||
|
||||
r = np.zeros((N, N))
|
||||
for i in range(N):
|
||||
for j in range(N):
|
||||
r[i, j] = np.exp(-0.8 * np.fabs(i - j)) # element in correlation matrix
|
||||
|
||||
errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2]) # set y errors
|
||||
for i in range(N):
|
||||
for j in range(N):
|
||||
r[i, j] *= errl[i] * errl[j] # element in covariance matrix
|
||||
|
||||
c = cholesky(r, lower=True)
|
||||
y = np.dot(c, x)
|
||||
x = np.arange(N)
|
||||
x_dict = {}
|
||||
y_dict = {}
|
||||
for i,item in enumerate(x):
|
||||
x_dict[str(item)] = [x[i]]
|
||||
|
||||
for linear in [True, False]:
|
||||
data = []
|
||||
for i in range(N):
|
||||
if linear:
|
||||
data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens']))
|
||||
else:
|
||||
data.append(pe.Obs([[np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i]]], ['ens']))
|
||||
|
||||
[o.gamma_method() for o in data]
|
||||
|
||||
data_dict = {}
|
||||
for i,item in enumerate(x):
|
||||
data_dict[str(item)] = [data[i]]
|
||||
|
||||
corr = pe.covariance(data, correlation=True)
|
||||
chol = np.linalg.cholesky(corr)
|
||||
covdiag = np.diag(1 / np.asarray([o.dvalue for o in data]))
|
||||
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
|
||||
chol_inv_keys = [""]
|
||||
chol_inv_keys_combined_fit = [str(item) for i,item in enumerate(x)]
|
||||
|
||||
if linear:
|
||||
def fitf(p, x):
|
||||
return p[1] + p[0] * x
|
||||
fitf_dict = {}
|
||||
for i,item in enumerate(x):
|
||||
fitf_dict[str(item)] = fitf
|
||||
else:
|
||||
def fitf(p, x):
|
||||
return p[1] * anp.exp(-p[0] * x)
|
||||
fitf_dict = {}
|
||||
for i,item in enumerate(x):
|
||||
fitf_dict[str(item)] = fitf
|
||||
|
||||
fitpc = pe.least_squares(x, data, fitf, correlated_fit=True)
|
||||
fitp_inv_cov = pe.least_squares(x, data, fitf, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys])
|
||||
fitp_inv_cov_combined_fit = pe.least_squares(x_dict, data_dict, fitf_dict, correlated_fit = True, inv_chol_cov_matrix = [chol_inv,chol_inv_keys_combined_fit])
|
||||
for i in range(2):
|
||||
diff_inv_cov = fitp_inv_cov[i] - fitpc[i]
|
||||
diff_inv_cov.gamma_method()
|
||||
assert(diff_inv_cov.is_zero(atol=0.0))
|
||||
diff_inv_cov_combined_fit = fitp_inv_cov_combined_fit[i] - fitpc[i]
|
||||
diff_inv_cov_combined_fit.gamma_method()
|
||||
assert(diff_inv_cov_combined_fit.is_zero(atol=1e-12))
|
||||
|
||||
def test_least_squares_invalid_inv_cov_matrix_input():
|
||||
xvals = []
|
||||
yvals = []
|
||||
err = 0.1
|
||||
def func_valid(a,x):
|
||||
return a[0] + a[1] * x
|
||||
for x in range(1, 8, 2):
|
||||
xvals.append(x)
|
||||
yvals.append(pe.pseudo_Obs(x + np.random.normal(0.0, err), err, 'test1') + pe.pseudo_Obs(0, err / 100, 'test2', samples=87))
|
||||
|
||||
[o.gamma_method() for o in yvals]
|
||||
|
||||
#dictionaries for a combined fit
|
||||
xvals_dict = { }
|
||||
yvals_dict = { }
|
||||
for i,item in enumerate(np.arange(1, 8, 2)):
|
||||
xvals_dict[str(item)] = [xvals[i]]
|
||||
yvals_dict[str(item)] = [yvals[i]]
|
||||
chol_inv_keys_combined_fit = ['1', '3', '5', '7']
|
||||
chol_inv_keys_combined_fit_invalid = ['2', '7', '100', '8']
|
||||
func_dict_valid = {"1": func_valid,"3": func_valid,"5": func_valid,"7": func_valid}
|
||||
|
||||
corr_valid = pe.covariance(yvals, correlation = True)
|
||||
chol = np.linalg.cholesky(corr_valid)
|
||||
covdiag = np.diag(1 / np.asarray([o.dvalue for o in yvals]))
|
||||
chol_inv_valid = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
|
||||
chol_inv_keys = [""]
|
||||
pe.least_squares(xvals, yvals,func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys])
|
||||
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit])
|
||||
chol_inv_invalid_shape1 = np.zeros((len(yvals),len(yvals)-1))
|
||||
chol_inv_invalid_shape2 = np.zeros((len(yvals)+2,len(yvals)))
|
||||
|
||||
# for an uncombined fit
|
||||
with pytest.raises(TypeError):
|
||||
pe.least_squares(xvals, yvals, func_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys])
|
||||
with pytest.raises(TypeError):
|
||||
pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys])
|
||||
with pytest.raises(ValueError):
|
||||
pe.least_squares(xvals, yvals, func_valid,correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])
|
||||
|
||||
#repeat for a combined fit
|
||||
with pytest.raises(TypeError):
|
||||
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape1,chol_inv_keys_combined_fit])
|
||||
with pytest.raises(TypeError):
|
||||
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_invalid_shape2,chol_inv_keys_combined_fit])
|
||||
with pytest.raises(ValueError):
|
||||
pe.least_squares(xvals_dict, yvals_dict,func_dict_valid, correlated_fit = True, inv_chol_cov_matrix = [chol_inv_valid,chol_inv_keys_combined_fit_invalid])
|
||||
|
||||
def test_correlated_fit():
|
||||
num_samples = 400
|
||||
|
|
|
@ -1063,6 +1063,27 @@ def test_covariance_reorder_non_overlapping_data():
|
|||
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
|
||||
|
||||
|
||||
def test_sort_corr():
|
||||
xd = {
|
||||
'b': [1, 2, 3],
|
||||
'a': [2.2, 4.4],
|
||||
'c': [3.7, 5.1]
|
||||
}
|
||||
|
||||
yd = {k : pe.cov_Obs(xd[k], [.2 * o for o in xd[k]], k) for k in xd}
|
||||
key_orig = list(yd.keys())
|
||||
y_all = np.concatenate([np.array(yd[key]) for key in key_orig])
|
||||
[o.gm() for o in y_all]
|
||||
cov = pe.covariance(y_all)
|
||||
|
||||
key_ls = key_sorted = sorted(key_orig)
|
||||
y_sorted = np.concatenate([np.array(yd[key]) for key in key_sorted])
|
||||
[o.gm() for o in y_sorted]
|
||||
cov_sorted = pe.covariance(y_sorted)
|
||||
retcov = pe.obs.sort_corr(cov, key_orig, yd)
|
||||
assert np.sum(retcov - cov_sorted) == 0
|
||||
|
||||
|
||||
def test_empty_obs():
|
||||
o = pe.Obs([np.random.rand(100)], ['test'])
|
||||
q = o + pe.Obs([], [], means=[])
|
||||
|
|
Loading…
Add table
Reference in a new issue