From 82419b7a88f372bf9392d77b7f1a93338da98c36 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 09:45:25 +0000 Subject: [PATCH 01/10] feat: positive semi-definite estimator for the covariance implemented, fits.covariance matrix deprecated, covariance can now handle lists of observables. --- pyerrors/fits.py | 32 +++------------------- pyerrors/obs.py | 65 ++++++++++++++++++++++++++++++-------------- tests/covobs_test.py | 6 ++-- tests/fits_test.py | 10 +++---- tests/obs_test.py | 31 ++++++--------------- 5 files changed, 65 insertions(+), 79 deletions(-) diff --git a/pyerrors/fits.py b/pyerrors/fits.py index c1364c13..e290dbfb 100644 --- a/pyerrors/fits.py +++ b/pyerrors/fits.py @@ -239,7 +239,7 @@ def total_least_squares(x, y, func, silent=False, **kwargs): if kwargs.get('covariance') is not None: cov = kwargs.get('covariance') else: - cov = covariance_matrix(np.concatenate((y, x.ravel()))) + cov = covariance(np.concatenate((y, x.ravel()))) number_of_x_parameters = int(m / x_f.shape[-1]) @@ -455,7 +455,7 @@ def _standard_fit(x, y, func, silent=False, **kwargs): x0 = [0.1] * n_parms if kwargs.get('correlated_fit') is True: - cov = covariance_matrix(y) + cov = covariance(y) covdiag = np.diag(1. / np.sqrt(np.diag(cov))) corr = np.copy(cov) for i in range(len(y)): @@ -527,7 +527,7 @@ def _standard_fit(x, y, func, silent=False, **kwargs): if kwargs.get('expected_chisquare') is True: if kwargs.get('correlated_fit') is not True: W = np.diag(1 / np.asarray(dy_f)) - cov = covariance_matrix(y) + cov = covariance(y) A = W @ jacobian(func)(fit_result.x, x) P_phi = A @ np.linalg.inv(A.T @ A) @ A.T expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W) @@ -651,33 +651,9 @@ def residual_plot(x, y, func, fit_res): plt.draw() -def covariance_matrix(y): - """Returns the covariance matrix of y. - - Parameters - ---------- - y : list or numpy.ndarray - List or one dimensional array of Obs - """ - length = len(y) - cov = np.zeros((length, length)) - for i, item in enumerate(y): - for j, jtem in enumerate(y[:i + 1]): - if i == j: - cov[i, j] = item.dvalue ** 2 - else: - cov[i, j] = covariance(item, jtem) - cov = cov + cov.T - np.diag(np.diag(cov)) - eigenvalues = np.linalg.eigh(cov)[0] - if not np.all(eigenvalues >= 0): - warnings.warn("Covariance matrix is not positive semi-definite", RuntimeWarning) - print("Eigenvalues of the covariance matrix:", eigenvalues) - return cov - - def error_band(x, func, beta): """Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta.""" - cov = covariance_matrix(beta) + cov = covariance(beta) if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float64).eps): warnings.warn("Covariance matrix is not symmetric within floating point precision", RuntimeWarning) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index aea75752..ecb848f0 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1332,20 +1332,50 @@ def correlate(obs_a, obs_b): return o -def covariance(obs1, obs2, correlation=False, **kwargs): - """Calculates the covariance of two observables. +def covariance(obs, window=min, correlation=False, **kwargs): + """Calculates the covariance matrix of a set of observables. - covariance(obs, obs) is equal to obs.dvalue ** 2 + covariance([obs, obs])[0,1] is equal to obs.dvalue ** 2 The gamma method has to be applied first to both observables. - If abs(covariance(obs1, obs2)) > obs1.dvalue * obs2.dvalue, the covariance - is constrained to the maximum value. - Parameters ---------- + obs : list or numpy.ndarray + List or one dimensional array of Obs + window: function or dict + Function which selects the window for each ensemble, examples 'min', 'max', 'np.mean', 'np.median' + Alternatively a dictionary with an entry for every ensemble can be manually specified. correlation : bool if true the correlation instead of the covariance is returned (default False) """ + if isinstance(window, dict): + window_dict = window + else: + window_dict = {} + names = sorted(set([item for sublist in [o.mc_names for o in obs] for item in sublist])) + for name in names: + window_list = [] + for ob in obs: + if ob.e_windowsize.get(name) is not None: + window_list.append(ob.e_windowsize[name]) + window_dict[name] = int(window(window_list)) + + length = len(obs) + cov = np.zeros((length, length)) + for i, item in enumerate(obs): + for j, jtem in enumerate(obs[:i + 1]): + cov[i, j] = _covariance_element(item, jtem, window_dict) + cov = cov + cov.T - np.diag(np.diag(cov)) + eigenvalues = np.linalg.eigh(cov)[0] + if not np.all(eigenvalues >= 0): + warnings.warn("Covariance matrix is not positive semi-definite", RuntimeWarning) + print("Eigenvalues of the covariance matrix:", eigenvalues) + return cov + + +def _covariance_element(obs1, obs2, window_dict, correlation=False, **kwargs): + """TODO + """ def expand_deltas(deltas, idx, shape, new_idx): """Expand deltas defined on idx to a contiguous range [new_idx[0], new_idx[-1]]. @@ -1398,21 +1428,16 @@ def covariance(obs1, obs2, correlation=False, **kwargs): if e_name not in obs2.mc_names: continue + window = window_dict[e_name] + idl_d = {} - r_length = [] for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) - if isinstance(idl_d[r_name], range): - r_length.append(len(idl_d[r_name])) - else: - r_length.append((idl_d[r_name][-1] - idl_d[r_name][0] + 1)) + # TODO: Is a check needed if the length of an ensemble is zero? - if not r_length: - return 0. - - w_max = max(r_length) // 2 + w_max = window + 1 e_gamma[e_name] = np.zeros(w_max) for r_name in obs1.e_content[e_name]: @@ -1438,11 +1463,10 @@ def covariance(obs1, obs2, correlation=False, **kwargs): e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], e_rho[e_name][1:]))) # Make sure no entry of tauint is smaller than 0.5 - e_n_tauint[e_name][e_n_tauint[e_name] < 0.5] = 0.500000000001 + e_n_tauint[e_name][e_n_tauint[e_name] < 0.5] = 0.5 + np.finfo(np.float64).eps - window = min(obs1.e_windowsize[e_name], obs2.e_windowsize[e_name]) # Bias correction hep-lat/0306017 eq. (49) - e_dvalue[e_name] = 2 * (e_n_tauint[e_name][window] + obs1.tau_exp[e_name] * np.abs(e_rho[e_name][window + 1])) * (1 + (2 * window + 1) / e_N) * e_gamma[e_name][0] / e_N + e_dvalue[e_name] = 2 * (e_n_tauint[e_name][window]) * (1 + (2 * window + 1) / e_N) * e_gamma[e_name][0] / e_N dvalue += e_dvalue[e_name] @@ -1453,8 +1477,9 @@ def covariance(obs1, obs2, correlation=False, **kwargs): dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) - if np.abs(dvalue / obs1.dvalue / obs2.dvalue) > 1.0: - dvalue = np.sign(dvalue) * obs1.dvalue * obs2.dvalue + # TODO: Check if this is needed. + # if np.abs(dvalue / obs1.dvalue / obs2.dvalue) > 1.0: + # dvalue = np.sign(dvalue) * obs1.dvalue * obs2.dvalue if correlation: dvalue = dvalue / obs1.dvalue / obs2.dvalue diff --git a/tests/covobs_test.py b/tests/covobs_test.py index 82cd4ae0..6bc1d49a 100644 --- a/tests/covobs_test.py +++ b/tests/covobs_test.py @@ -39,8 +39,8 @@ def test_covobs(): assert(np.isclose(oc.value, op.value, rtol=1e-14, atol=1e-14)) [o.gamma_method() for o in cl] - assert(pe.covariance(cl[0], cl[1]) == cov[0][1]) - assert(pe.covariance(cl[0], cl[1]) == cov[1][0]) + assert(pe.covariance([cl[0], cl[1]])[0, 1] == cov[0][1]) + assert(pe.covariance([cl[0], cl[1]])[0, 1] == cov[1][0]) do = cl[0] * cl[1] assert(np.array_equal(do.covobs['rAP'].grad, np.transpose([pi[1], pi[0]]).reshape(2, 1))) @@ -89,7 +89,7 @@ def test_covobs_covariance(): x = [a + b, a - b] [o.gamma_method() for o in x] - covariance = pe.fits.covariance_matrix(x) + covariance = pe.covariance(x) assert covariance[0, 0] == covariance[1, 1] assert covariance[0, 1] == a.dvalue ** 2 - b.dvalue ** 2 diff --git a/tests/fits_test.py b/tests/fits_test.py index 90a073e7..4367ac65 100644 --- a/tests/fits_test.py +++ b/tests/fits_test.py @@ -60,7 +60,7 @@ def test_least_squares(): beta[i].gamma_method(S=1.0) assert math.isclose(beta[i].value, popt[i], abs_tol=1e-5) assert math.isclose(pcov[i, i], beta[i].dvalue ** 2, abs_tol=1e-3) - assert math.isclose(pe.covariance(beta[0], beta[1]), pcov[0, 1], abs_tol=1e-3) + assert math.isclose(pe.covariance([beta[0], beta[1]])[0, 1], pcov[0, 1], abs_tol=1e-3) chi2_pyerrors = np.sum(((f(x, *[o.value for o in beta]) - y) / yerr) ** 2) / (len(x) - 2) chi2_scipy = np.sum(((f(x, *popt) - y) / yerr) ** 2) / (len(x) - 2) @@ -81,7 +81,7 @@ def test_least_squares(): betac[i].gamma_method(S=1.0) assert math.isclose(betac[i].value, popt[i], abs_tol=1e-5) assert math.isclose(pcov[i, i], betac[i].dvalue ** 2, abs_tol=1e-3) - assert math.isclose(pe.covariance(betac[0], betac[1]), pcov[0, 1], abs_tol=1e-3) + assert math.isclose(pe.covariance([betac[0], betac[1]])[0, 1], pcov[0, 1], abs_tol=1e-3) def test_alternative_solvers(): @@ -195,7 +195,7 @@ def test_total_least_squares(): beta[i].gamma_method(S=1.0) assert math.isclose(beta[i].value, output.beta[i], rel_tol=1e-5) assert math.isclose(output.cov_beta[i, i], beta[i].dvalue ** 2, rel_tol=2.5e-1), str(output.cov_beta[i, i]) + ' ' + str(beta[i].dvalue ** 2) - assert math.isclose(pe.covariance(beta[0], beta[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) + assert math.isclose(pe.covariance([beta[0], beta[1]])[0, 1], output.cov_beta[0, 1], rel_tol=2.5e-1) out = pe.total_least_squares(ox, oy, func, const_par=[beta[1]]) @@ -218,7 +218,7 @@ def test_total_least_squares(): betac[i].gamma_method(S=1.0) assert math.isclose(betac[i].value, output.beta[i], rel_tol=1e-3) assert math.isclose(output.cov_beta[i, i], betac[i].dvalue ** 2, rel_tol=2.5e-1), str(output.cov_beta[i, i]) + ' ' + str(betac[i].dvalue ** 2) - assert math.isclose(pe.covariance(betac[0], betac[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) + assert math.isclose(pe.covariance([betac[0], betac[1]])[0, 1], output.cov_beta[0, 1], rel_tol=2.5e-1) outc = pe.total_least_squares(oxc, oyc, func, const_par=[betac[1]]) @@ -233,7 +233,7 @@ def test_total_least_squares(): betac[i].gamma_method(S=1.0) assert math.isclose(betac[i].value, output.beta[i], rel_tol=1e-3) assert math.isclose(output.cov_beta[i, i], betac[i].dvalue ** 2, rel_tol=2.5e-1), str(output.cov_beta[i, i]) + ' ' + str(betac[i].dvalue ** 2) - assert math.isclose(pe.covariance(betac[0], betac[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) + assert math.isclose(pe.covariance([betac[0], betac[1]])[0, 1], output.cov_beta[0, 1], rel_tol=2.5e-1) outc = pe.total_least_squares(oxc, oy, func, const_par=[betac[1]]) diff --git a/tests/obs_test.py b/tests/obs_test.py index 2fed00af..abbb091c 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -251,10 +251,10 @@ def test_covariance_is_variance(): dvalue = np.abs(np.random.normal(0, 1)) test_obs = pe.pseudo_Obs(value, dvalue, 't') test_obs.gamma_method() - assert np.abs(test_obs.dvalue ** 2 - pe.covariance(test_obs, test_obs)) <= 10 * np.finfo(np.float64).eps + assert np.abs(test_obs.dvalue ** 2 - pe.covariance([test_obs, test_obs])[0, 1]) <= 10 * np.finfo(np.float64).eps test_obs = test_obs + pe.pseudo_Obs(value, dvalue, 'q', 200) test_obs.gamma_method() - assert np.abs(test_obs.dvalue ** 2 - pe.covariance(test_obs, test_obs)) <= 10 * np.finfo(np.float64).eps + assert np.abs(test_obs.dvalue ** 2 - pe.covariance([test_obs, test_obs])[0, 1]) <= 10 * np.finfo(np.float64).eps def test_fft(): @@ -268,21 +268,6 @@ def test_fft(): assert np.abs(test_obs1.dvalue - test_obs2.dvalue) <= 10 * max(test_obs1.dvalue, test_obs2.dvalue) * np.finfo(np.float64).eps -def test_covariance_symmetry(): - value1 = np.random.normal(5, 10) - dvalue1 = np.abs(np.random.normal(0, 1)) - test_obs1 = pe.pseudo_Obs(value1, dvalue1, 't') - test_obs1.gamma_method() - value2 = np.random.normal(5, 10) - dvalue2 = np.abs(np.random.normal(0, 1)) - test_obs2 = pe.pseudo_Obs(value2, dvalue2, 't') - test_obs2.gamma_method() - cov_ab = pe.covariance(test_obs1, test_obs2) - cov_ba = pe.covariance(test_obs2, test_obs1) - assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps - assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * (1 + 10 * np.finfo(np.float64).eps) - - def test_gamma_method_uncorrelated(): # Construct pseudo Obs with random shape value = np.random.normal(5, 10) @@ -629,8 +614,8 @@ def test_covariance_symmetry(): dvalue2 = np.abs(np.random.normal(0, 1)) test_obs2 = pe.pseudo_Obs(value2, dvalue2, 't') test_obs2.gamma_method() - cov_ab = pe.covariance(test_obs1, test_obs2) - cov_ba = pe.covariance(test_obs2, test_obs1) + cov_ab = pe.covariance([test_obs1, test_obs2])[0, 1] + cov_ba = pe.covariance([test_obs2, test_obs1])[0, 1] assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * (1 + 10 * np.finfo(np.float64).eps) @@ -643,10 +628,10 @@ def test_covariance_symmetry(): idx = [i + 1 for i in range(len(configs)) if configs[i] == 1] a = pe.Obs([zero_arr], ['t'], idl=[idx]) a.gamma_method() - assert np.isclose(a.dvalue**2, pe.covariance(a, a), atol=100, rtol=1e-4) + assert np.isclose(a.dvalue ** 2, pe.covariance([a, a])[0, 1], atol=100, rtol=1e-4) - cov_ab = pe.covariance(test_obs1, a) - cov_ba = pe.covariance(a, test_obs1) + cov_ab = pe.covariance([test_obs1, a])[0, 1] + cov_ba = pe.covariance([a, test_obs1])[0, 1] assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * a.dvalue * (1 + 10 * np.finfo(np.float64).eps) @@ -704,4 +689,4 @@ def test_merge_idx(): new_idx = pe.obs._merge_idx(idl) assert(new_idx[-1] > new_idx[0]) for i in range(1, len(new_idx)): - assert(new_idx[i - 1] < new_idx[i]) \ No newline at end of file + assert(new_idx[i - 1] < new_idx[i]) From 74b0f77c2d77f2f9a11a0982bd8fbd4acf0820b6 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 14:32:13 +0000 Subject: [PATCH 02/10] feat: covariance is now estimated from the uncorrelated correlation matrix rescaled by the full (correlated) errors. --- pyerrors/obs.py | 52 +++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index ecb848f0..8437a795 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1332,50 +1332,40 @@ def correlate(obs_a, obs_b): return o -def covariance(obs, window=min, correlation=False, **kwargs): +def covariance(obs, correlation=False, **kwargs): """Calculates the covariance matrix of a set of observables. covariance([obs, obs])[0,1] is equal to obs.dvalue ** 2 - The gamma method has to be applied first to both observables. + The gamma method has to be applied first to all observables. Parameters ---------- obs : list or numpy.ndarray List or one dimensional array of Obs - window: function or dict - Function which selects the window for each ensemble, examples 'min', 'max', 'np.mean', 'np.median' - Alternatively a dictionary with an entry for every ensemble can be manually specified. correlation : bool if true the correlation instead of the covariance is returned (default False) """ - if isinstance(window, dict): - window_dict = window - else: - window_dict = {} - names = sorted(set([item for sublist in [o.mc_names for o in obs] for item in sublist])) - for name in names: - window_list = [] - for ob in obs: - if ob.e_windowsize.get(name) is not None: - window_list.append(ob.e_windowsize[name]) - window_dict[name] = int(window(window_list)) length = len(obs) cov = np.zeros((length, length)) - for i, item in enumerate(obs): - for j, jtem in enumerate(obs[:i + 1]): - cov[i, j] = _covariance_element(item, jtem, window_dict) + for i in range(length): + for j in range(i, length): + cov[i, j] = _covariance_element(obs[i], obs[j]) cov = cov + cov.T - np.diag(np.diag(cov)) + + corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) + + errors = [o.dvalue for o in obs] + cov = np.sqrt(np.diag(errors)) @ corr @ np.sqrt(np.diag(errors)) + eigenvalues = np.linalg.eigh(cov)[0] if not np.all(eigenvalues >= 0): - warnings.warn("Covariance matrix is not positive semi-definite", RuntimeWarning) - print("Eigenvalues of the covariance matrix:", eigenvalues) + warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) return cov -def _covariance_element(obs1, obs2, window_dict, correlation=False, **kwargs): - """TODO - """ +def _covariance_element(obs1, obs2): + """Estimates the covariance of two Obs objects based on fixed window sizes passed to the function.""" def expand_deltas(deltas, idx, shape, new_idx): """Expand deltas defined on idx to a contiguous range [new_idx[0], new_idx[-1]]. @@ -1407,7 +1397,9 @@ def _covariance_element(obs1, obs2, window_dict, correlation=False, **kwargs): max_gamma = min(new_shape, w_max) # The padding for the fft has to be even padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 - gamma[:max_gamma] += (np.fft.irfft(np.fft.rfft(deltas1, padding) * np.conjugate(np.fft.rfft(deltas2, padding)))[:max_gamma] + np.fft.irfft(np.fft.rfft(deltas2, padding) * np.conjugate(np.fft.rfft(deltas1, padding)))[:max_gamma]) / 2.0 + rfft1 = np.fft.rfft(deltas1, padding) + rfft2 = np.fft.rfft(deltas2, padding) + gamma[:max_gamma] += (np.fft.irfft(rfft1 * np.conjugate(rfft2))[:max_gamma] + np.fft.irfft(rfft2 * np.conjugate(rfft1))[:max_gamma]) / 2.0 return gamma @@ -1428,14 +1420,13 @@ def _covariance_element(obs1, obs2, window_dict, correlation=False, **kwargs): if e_name not in obs2.mc_names: continue - window = window_dict[e_name] + window = 0 idl_d = {} for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) - # TODO: Is a check needed if the length of an ensemble is zero? w_max = window + 1 e_gamma[e_name] = np.zeros(w_max) @@ -1477,13 +1468,6 @@ def _covariance_element(obs1, obs2, window_dict, correlation=False, **kwargs): dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) - # TODO: Check if this is needed. - # if np.abs(dvalue / obs1.dvalue / obs2.dvalue) > 1.0: - # dvalue = np.sign(dvalue) * obs1.dvalue * obs2.dvalue - - if correlation: - dvalue = dvalue / obs1.dvalue / obs2.dvalue - return dvalue From c28d6131b1a892ace1c9586f846fa0ddbdace9b4 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 14:35:38 +0000 Subject: [PATCH 03/10] refactor: redundant lines in _covariance_element removed. --- pyerrors/obs.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 8437a795..19b08860 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1365,7 +1365,7 @@ def covariance(obs, correlation=False, **kwargs): def _covariance_element(obs1, obs2): - """Estimates the covariance of two Obs objects based on fixed window sizes passed to the function.""" + """Estimates the uncorrelated covariance of two Obs objects.""" def expand_deltas(deltas, idx, shape, new_idx): """Expand deltas defined on idx to a contiguous range [new_idx[0], new_idx[-1]]. @@ -1410,25 +1410,20 @@ def _covariance_element(obs1, obs2): raise Exception('The gamma method has to be applied to both Obs first.') dvalue = 0 + w_max = 1 e_gamma = {} - e_dvalue = {} - e_n_tauint = {} - e_rho = {} for e_name in obs1.mc_names: if e_name not in obs2.mc_names: continue - window = 0 - idl_d = {} for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) - w_max = window + 1 e_gamma[e_name] = np.zeros(w_max) for r_name in obs1.e_content[e_name]: @@ -1451,15 +1446,8 @@ def _covariance_element(obs1, obs2): e_N += np.sum(np.ones_like(idl_d[r_name])) e_gamma[e_name] /= gamma_div[:w_max] - e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] - e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], e_rho[e_name][1:]))) - # Make sure no entry of tauint is smaller than 0.5 - e_n_tauint[e_name][e_n_tauint[e_name] < 0.5] = 0.5 + np.finfo(np.float64).eps - # Bias correction hep-lat/0306017 eq. (49) - e_dvalue[e_name] = 2 * (e_n_tauint[e_name][window]) * (1 + (2 * window + 1) / e_N) * e_gamma[e_name][0] / e_N - - dvalue += e_dvalue[e_name] + dvalue += (1 + 1 / e_N) * e_gamma[e_name][0] / e_N for e_name in obs1.cov_names: From 3796c0395f83a146bed3c71b34afbc6f7f259de3 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 15:34:53 +0000 Subject: [PATCH 04/10] feat: computation of _covariance_element optimized, visualize option added to covariance, tests adjusted. --- pyerrors/obs.py | 51 +++++++++++++++++++------------------------- tests/covobs_test.py | 8 +++---- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 19b08860..3cb22f36 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1332,7 +1332,7 @@ def correlate(obs_a, obs_b): return o -def covariance(obs, correlation=False, **kwargs): +def covariance(obs, visualize=False, **kwargs): """Calculates the covariance matrix of a set of observables. covariance([obs, obs])[0,1] is equal to obs.dvalue ** 2 @@ -1342,8 +1342,8 @@ def covariance(obs, correlation=False, **kwargs): ---------- obs : list or numpy.ndarray List or one dimensional array of Obs - correlation : bool - if true the correlation instead of the covariance is returned (default False) + visualize : bool + Plots the corresponding normalized correlation matrix. """ length = len(obs) @@ -1356,11 +1356,18 @@ def covariance(obs, correlation=False, **kwargs): corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) errors = [o.dvalue for o in obs] - cov = np.sqrt(np.diag(errors)) @ corr @ np.sqrt(np.diag(errors)) + cov = np.diag(errors) @ corr @ np.diag(errors) eigenvalues = np.linalg.eigh(cov)[0] if not np.all(eigenvalues >= 0): warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) + + if visualize: + plt.matshow(corr, vmin=-1, vmax=1) + plt.set_cmap('RdBu') + plt.colorbar() + plt.draw() + return cov @@ -1389,29 +1396,18 @@ def _covariance_element(obs1, obs2): ret[idx[i] - new_idx[0]] = deltas[i] return ret - def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx, w_max): - gamma = np.zeros(w_max) + def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): deltas1 = expand_deltas(deltas1, idx1, len(idx1), new_idx) deltas2 = expand_deltas(deltas2, idx2, len(idx2), new_idx) - new_shape = len(deltas1) - max_gamma = min(new_shape, w_max) - # The padding for the fft has to be even - padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 - rfft1 = np.fft.rfft(deltas1, padding) - rfft2 = np.fft.rfft(deltas2, padding) - gamma[:max_gamma] += (np.fft.irfft(rfft1 * np.conjugate(rfft2))[:max_gamma] + np.fft.irfft(rfft2 * np.conjugate(rfft1))[:max_gamma]) / 2.0 - - return gamma + return np.sum(deltas1 * deltas2) if set(obs1.names).isdisjoint(set(obs2.names)): - return 0. + return 0.0 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): raise Exception('The gamma method has to be applied to both Obs first.') - dvalue = 0 - w_max = 1 - e_gamma = {} + dvalue = 0.0 for e_name in obs1.mc_names: @@ -1424,30 +1420,27 @@ def _covariance_element(obs1, obs2): continue idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) - e_gamma[e_name] = np.zeros(w_max) + gamma = 0.0 for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - e_gamma[e_name] += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name], w_max) + gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) - if np.all(e_gamma[e_name] == 0.0): + if gamma == 0.0: continue - e_shapes = [] - for r_name in obs1.e_content[e_name]: - e_shapes.append(obs1.shape[r_name]) - gamma_div = np.zeros(w_max) + gamma_div = 0.0 e_N = 0 for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name], w_max) + gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) e_N += np.sum(np.ones_like(idl_d[r_name])) - e_gamma[e_name] /= gamma_div[:w_max] + gamma /= gamma_div # Bias correction hep-lat/0306017 eq. (49) - dvalue += (1 + 1 / e_N) * e_gamma[e_name][0] / e_N + dvalue += (1 + 1 / e_N) * gamma / e_N for e_name in obs1.cov_names: diff --git a/tests/covobs_test.py b/tests/covobs_test.py index 6bc1d49a..f0a53e89 100644 --- a/tests/covobs_test.py +++ b/tests/covobs_test.py @@ -39,8 +39,8 @@ def test_covobs(): assert(np.isclose(oc.value, op.value, rtol=1e-14, atol=1e-14)) [o.gamma_method() for o in cl] - assert(pe.covariance([cl[0], cl[1]])[0, 1] == cov[0][1]) - assert(pe.covariance([cl[0], cl[1]])[0, 1] == cov[1][0]) + assert(np.isclose(pe.covariance([cl[0], cl[1]])[0, 1], cov[0][1])) + assert(np.isclose(pe.covariance([cl[0], cl[1]])[0, 1], cov[1][0])) do = cl[0] * cl[1] assert(np.array_equal(do.covobs['rAP'].grad, np.transpose([pi[1], pi[0]]).reshape(2, 1))) @@ -91,8 +91,8 @@ def test_covobs_covariance(): covariance = pe.covariance(x) - assert covariance[0, 0] == covariance[1, 1] - assert covariance[0, 1] == a.dvalue ** 2 - b.dvalue ** 2 + assert np.isclose(covariance[0, 0], covariance[1, 1]) + assert np.isclose(covariance[0, 1], a.dvalue ** 2 - b.dvalue ** 2) def test_covobs_exceptions(): From 8d93ff95f202bf8a67374e222336dd7f3b32a0f7 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 17:27:04 +0000 Subject: [PATCH 05/10] tests: further tests for covariance added. --- tests/obs_test.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/obs_test.py b/tests/obs_test.py index abbb091c..c86da060 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -636,6 +636,43 @@ def test_covariance_symmetry(): assert np.abs(cov_ab) < test_obs1.dvalue * a.dvalue * (1 + 10 * np.finfo(np.float64).eps) +def test_covariance_sum(): + length = 2 + t_fac = 0.4 + tt = pe.misc.gen_correlated_data(np.zeros(length), 0.99 * np.ones((length, length)) + 0.01 * np.diag(np.ones(length)), 'test', tau=0.5 + t_fac * np.random.rand(length), samples=1000) + [o.gamma_method(S=0) for o in tt] + + t_cov = pe.covariance(tt) + + my_sum = tt[0] + tt[1] + my_sum.gamma_method(S=0) + e_cov = (my_sum.dvalue ** 2 - tt[0].dvalue ** 2 - tt[1].dvalue ** 2) / 2 + + assert np.isclose(e_cov, t_cov[0, 1]) + + +def test_covariance_positive_semidefinite(): + length = 64 + t_fac = 1.5 + tt = pe.misc.gen_correlated_data(np.zeros(length), 0.99999 * np.ones((length, length)) + 0.00001 * np.diag(np.ones(length)), 'test', tau=0.5 + t_fac * np.random.rand(length), samples=1000) + [o.gamma_method() for o in tt] + cov = pe.covariance(tt) + assert np.all(np.linalg.eigh(cov)[0] >= -1e-15) + + +def test_covariance_factorizing(): + length = 2 + t_fac = 1.5 + + tt = pe.misc.gen_correlated_data(np.zeros(length), 0.75 * np.ones((length, length)) + 0.8 * np.diag(np.ones(length)), 'test', tau=0.5 + t_fac * np.random.rand(length), samples=1000) + [o.gamma_method() for o in tt] + + mt0 = -tt[0] + mt0.gamma_method() + + assert np.isclose(pe.covariance([mt0, tt[1]])[0, 1], -pe.covariance(tt)[0, 1]) + + def test_empty_obs(): o = pe.Obs([np.random.rand(100)], ['test']) q = o + pe.Obs([], []) From da0c43fe9ae1d43dc361329ceffdceab9db9d3e4 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 17:46:12 +0000 Subject: [PATCH 06/10] feat: correlation parameter added to covariance again, tests extended. --- pyerrors/obs.py | 11 ++++++++--- tests/obs_test.py | 30 ++++++++++++++++++------------ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 3cb22f36..461e76a8 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1332,7 +1332,7 @@ def correlate(obs_a, obs_b): return o -def covariance(obs, visualize=False, **kwargs): +def covariance(obs, visualize=False, correlation=False, **kwargs): """Calculates the covariance matrix of a set of observables. covariance([obs, obs])[0,1] is equal to obs.dvalue ** 2 @@ -1343,7 +1343,9 @@ def covariance(obs, visualize=False, **kwargs): obs : list or numpy.ndarray List or one dimensional array of Obs visualize : bool - Plots the corresponding normalized correlation matrix. + If True plots the corresponding normalized correlation matrix (default False). + correlation : bool + If True the correlation instead of the covariance is returned (default False). """ length = len(obs) @@ -1368,7 +1370,10 @@ def covariance(obs, visualize=False, **kwargs): plt.colorbar() plt.draw() - return cov + if correlation is True: + return corr + else: + return cov def _covariance_element(obs1, obs2): diff --git a/tests/obs_test.py b/tests/obs_test.py index c86da060..2a80cac1 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -246,17 +246,6 @@ def test_gamma_method_kwargs(): assert my_obs.N_sigma['ens'] == pe.Obs.N_sigma_global -def test_covariance_is_variance(): - value = np.random.normal(5, 10) - dvalue = np.abs(np.random.normal(0, 1)) - test_obs = pe.pseudo_Obs(value, dvalue, 't') - test_obs.gamma_method() - assert np.abs(test_obs.dvalue ** 2 - pe.covariance([test_obs, test_obs])[0, 1]) <= 10 * np.finfo(np.float64).eps - test_obs = test_obs + pe.pseudo_Obs(value, dvalue, 'q', 200) - test_obs.gamma_method() - assert np.abs(test_obs.dvalue ** 2 - pe.covariance([test_obs, test_obs])[0, 1]) <= 10 * np.finfo(np.float64).eps - - def test_fft(): value = np.random.normal(5, 100) dvalue = np.abs(np.random.normal(0, 5)) @@ -605,6 +594,18 @@ def test_gamma_method_irregular(): ol = [a, b] o = (ol[0] - ol[1]) / (ol[1]) + +def test_covariance_is_variance(): + value = np.random.normal(5, 10) + dvalue = np.abs(np.random.normal(0, 1)) + test_obs = pe.pseudo_Obs(value, dvalue, 't') + test_obs.gamma_method() + assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1]) + test_obs = test_obs + pe.pseudo_Obs(value, dvalue, 'q', 200) + test_obs.gamma_method() + assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1]) + + def test_covariance_symmetry(): value1 = np.random.normal(5, 10) dvalue1 = np.abs(np.random.normal(0, 1)) @@ -616,7 +617,7 @@ def test_covariance_symmetry(): test_obs2.gamma_method() cov_ab = pe.covariance([test_obs1, test_obs2])[0, 1] cov_ba = pe.covariance([test_obs2, test_obs1])[0, 1] - assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps + assert np.isclose(cov_ab, cov_ba) assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * (1 + 10 * np.finfo(np.float64).eps) N = 100 @@ -673,6 +674,11 @@ def test_covariance_factorizing(): assert np.isclose(pe.covariance([mt0, tt[1]])[0, 1], -pe.covariance(tt)[0, 1]) +def test_covariance_correlation(): + test_obs = pe.pseudo_Obs(-4, 0.8, 'test', samples=784) + assert np.allclose(pe.covariance([test_obs, test_obs, test_obs], correlation=True), np.ones((3, 3))) + + def test_empty_obs(): o = pe.Obs([np.random.rand(100)], ['test']) q = o + pe.Obs([], []) From 1f0f0604721d7caa38b65fa68ee92640eb0a45b0 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 19:07:08 +0000 Subject: [PATCH 07/10] docs: docstring for covariance extended. --- pyerrors/obs.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 461e76a8..4b6a431a 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1335,7 +1335,6 @@ def correlate(obs_a, obs_b): def covariance(obs, visualize=False, correlation=False, **kwargs): """Calculates the covariance matrix of a set of observables. - covariance([obs, obs])[0,1] is equal to obs.dvalue ** 2 The gamma method has to be applied first to all observables. Parameters @@ -1346,6 +1345,14 @@ def covariance(obs, visualize=False, correlation=False, **kwargs): If True plots the corresponding normalized correlation matrix (default False). correlation : bool If True the correlation instead of the covariance is returned (default False). + + Notes + ----- + The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. This is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. + $$ + \tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}} + $$ + This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). """ length = len(obs) From 42d9d3630e670ff4bd3e5a0688c8de18fe5e78ad Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 1 Mar 2022 19:16:54 +0000 Subject: [PATCH 08/10] ci: W605 added to flake8 exceptions. --- .github/workflows/flake8.yml | 2 +- CONTRIBUTING.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index efb81565..3b067cd2 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -21,6 +21,6 @@ jobs: - name: flake8 Lint uses: py-actions/flake8@v1 with: - ignore: "E501" + ignore: "E501,W605" exclude: "__init__.py, input/__init__.py" path: "pyerrors" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd627bd2..8ef8055e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -24,12 +24,12 @@ For all pull requests tests are executed for the most recent python releases via ``` pytest --cov=pyerrors -vv ``` -requiring `pytest`, `pytest-cov` and `pytest-benchmark`. To get a coverage report in html run +requiring `pytest`, `pytest-cov` and `pytest-benchmark`. To get a coverage report in html run ``` pytest --cov=pyerrors --cov-report html ``` The linter `flake8` is executed with the command ``` -flake8 --ignore=E501 --exclude=__init__.py pyerrors +flake8 --ignore=E501,W605 --exclude=__init__.py pyerrors ``` Please make sure that all tests are passed for a new pull requests. From 11007dffc9d7f0e2081fde9a6a1f6c1e045629c9 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 2 Mar 2022 11:11:48 +0000 Subject: [PATCH 09/10] docs: docstrings of covariance and _covariance_element clarified --- pyerrors/obs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 4b6a431a..69af5cea 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1348,7 +1348,7 @@ def covariance(obs, visualize=False, correlation=False, **kwargs): Notes ----- - The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. This is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. + The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. For observables defined on a single ensemble this is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$ \tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}} $$ @@ -1384,7 +1384,7 @@ def covariance(obs, visualize=False, correlation=False, **kwargs): def _covariance_element(obs1, obs2): - """Estimates the uncorrelated covariance of two Obs objects.""" + """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" def expand_deltas(deltas, idx, shape, new_idx): """Expand deltas defined on idx to a contiguous range [new_idx[0], new_idx[-1]]. From ece3c5d2ba62f3ed84e20f581db95cc3823a18e2 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 2 Mar 2022 11:49:29 +0000 Subject: [PATCH 10/10] tests: additional test added. --- tests/obs_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/obs_test.py b/tests/obs_test.py index 2a80cac1..3d773951 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -674,6 +674,23 @@ def test_covariance_factorizing(): assert np.isclose(pe.covariance([mt0, tt[1]])[0, 1], -pe.covariance(tt)[0, 1]) +def test_covariance_alternation(): + length = 12 + t_fac = 2.5 + + tt1 = pe.misc.gen_correlated_data(np.zeros(length), -0.00001 * np.ones((length, length)) + 0.002 * np.diag(np.ones(length)), 'test', tau=0.5 + t_fac * np.random.rand(length), samples=88) + tt2 = pe.misc.gen_correlated_data(np.zeros(length), 0.9999 * np.ones((length, length)) + 0.0001 * np.diag(np.ones(length)), 'another_test|r0', tau=0.7 + t_fac * np.random.rand(length), samples=73) + tt3 = pe.misc.gen_correlated_data(np.zeros(length), 0.9999 * np.ones((length, length)) + 0.0001 * np.diag(np.ones(length)), 'another_test|r1', tau=0.7 + t_fac * np.random.rand(length), samples=91) + + tt = np.array(tt1) + (np.array(tt2) + np.array(tt3)) + tt *= np.resize([1, -1], length) + + [o.gamma_method() for o in tt] + cov = pe.covariance(tt, True) + + assert np.all(np.linalg.eigh(cov)[0] > -1e-15) + + def test_covariance_correlation(): test_obs = pe.pseudo_Obs(-4, 0.8, 'test', samples=784) assert np.allclose(pe.covariance([test_obs, test_obs, test_obs], correlation=True), np.ones((3, 3)))