diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 3114bcd6..220b1646 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1,5 +1,7 @@ import warnings import pickle +from math import gcd +from functools import reduce import numpy as np import autograd.numpy as anp # Thinly-wrapped numpy from autograd import jacobian @@ -981,6 +983,39 @@ def _merge_idx(idl): return sorted(set().union(*idl)) +def _intersection_idx(idl): + """Returns the intersection of all lists in idl as sorted list + + Parameters + ---------- + idl : list + List of lists or ranges. + """ + + def _lcm(*args): + """Returns the lowest common multiple of args. + + From python 3.9 onwards the math library contains an lcm function.""" + return reduce(lambda a, b: a * b // gcd(a, b), args) + + # Use groupby to efficiently check whether all elements of idl are identical + try: + g = groupby(idl) + if next(g, True) and not next(g, False): + return idl[0] + except Exception: + pass + + if np.all([type(idx) is range for idx in idl]): + if len(set([idx[0] for idx in idl])) == 1: + idstart = max([idx.start for idx in idl]) + idstop = min([idx.stop for idx in idl]) + idstep = _lcm(*[idx.step for idx in idl]) + return range(idstart, idstop, idstep) + + return sorted(set.intersection(*[set(o) for o in idl])) + + def _expand_deltas_for_merge(deltas, idx, shape, new_idx): """Expand deltas defined on idx to the list of configs that is defined by new_idx. New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest @@ -1008,6 +1043,34 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx): return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) +def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): + """Collapse deltas defined on idx to the list of configs that is defined by new_idx. + If idx and new_idx are of type range, the smallest + common divisor of the step sizes is used as new step size. + + Parameters + ---------- + deltas : list + List of fluctuations + idx : list + List or range of configs on which the deltas are defined. + Has to be a subset of new_idx and has to be sorted in ascending order. + shape : list + Number of configs in idx. + new_idx : list + List of configs that defines the new range, has to be sorted in ascending order. + """ + + if type(idx) is range and type(new_idx) is range: + if idx == new_idx: + return deltas + ret = np.zeros(new_idx[-1] - new_idx[0] + 1) + for i in range(shape): + if idx[i] in new_idx: + ret[idx[i] - new_idx[0]] = deltas[i] + return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) + + def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): """Filter out all configurations with vanishing fluctuation such that they do not contribute to the error estimate anymore. Returns the new deltas and @@ -1429,8 +1492,8 @@ def _covariance_element(obs1, obs2): """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): - deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) - deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) + deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) + deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) return np.sum(deltas1 * deltas2) if set(obs1.names).isdisjoint(set(obs2.names)): @@ -1450,29 +1513,30 @@ def _covariance_element(obs1, obs2): for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) + idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) gamma = 0.0 for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue + if len(idl_d[r_name]) == 0: + continue gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) if gamma == 0.0: continue gamma_div = 0.0 - e_N = 0 for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) - e_N += len(idl_d[r_name]) - gamma /= max(gamma_div, 1.0) + if len(idl_d[r_name]) == 0: + continue + gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) + gamma /= gamma_div - # Bias correction hep-lat/0306017 eq. (49) - dvalue += (1 + 1 / e_N) * gamma / e_N + dvalue += gamma for e_name in obs1.cov_names: diff --git a/tests/obs_test.py b/tests/obs_test.py index 381ecdcb..55a23abd 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -515,6 +515,35 @@ def test_merge_idx(): assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50) +def test_intersection_idx(): + assert pe.obs._intersection_idx([range(1, 100), range(1, 100), range(1, 100)]) == range(1, 100) + assert pe.obs._intersection_idx([range(1, 100, 10), range(1, 100, 2)]) == range(1, 100, 10) + assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50) + assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250) + + for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]: + assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids]) + +def test_merge_intersection(): + for idl_list in [[range(1, 100), range(1, 100), range(1, 100)], + [range(4, 80, 6), range(4, 80, 6)], + [[0, 2, 8, 19, 205], [0, 2, 8, 19, 205]]]: + assert pe.obs._merge_idx(idl_list) == pe.obs._intersection_idx(idl_list) + + +def test_intersection_collapse(): + range1 = range(1, 2000, 2) + range2 = range(2, 2001, 8) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs_merge = obs1 + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + + intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]]) + coll = pe.obs._collapse_deltas_for_merge(obs_merge.deltas["ens"], obs_merge.idl["ens"], len(obs_merge.idl["ens"]), range1) + + assert np.all(coll == obs1.deltas["ens"]) + + def test_irregular_error_propagation(): obs_list = [pe.Obs([np.random.rand(100)], ['t']), pe.Obs([np.random.rand(50)], ['t'], idl=[range(1, 100, 2)]), @@ -619,6 +648,26 @@ def test_covariance_is_variance(): assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1]) +def test_covariance_vs_numpy(): + N = 1078 + data1 = np.random.normal(2.5, 0.2, N) + data2 = np.random.normal(0.5, 0.08, N) + data3 = np.random.normal(-178, 5, N) + uncorr = np.row_stack([data1, data2, data3]) + corr = np.random.multivariate_normal([0.0, 17, -0.0487], [[1.0, 0.6, -0.22], [0.6, 0.8, 0.01], [-0.22, 0.01, 1.9]], N).T + + for X in [uncorr, corr]: + obs1 = pe.Obs([X[0]], ["ens1"]) + obs2 = pe.Obs([X[1]], ["ens1"]) + obs3 = pe.Obs([X[2]], ["ens1"]) + obs1.gamma_method(S=0.0) + obs2.gamma_method(S=0.0) + obs3.gamma_method(S=0.0) + pe_cov = pe.covariance([obs1, obs2, obs3]) + np_cov = np.cov(X) / N + assert np.allclose(pe_cov, np_cov, atol=1e-14) + + def test_covariance_symmetry(): value1 = np.random.normal(5, 10) dvalue1 = np.abs(np.random.normal(0, 1)) @@ -729,6 +778,86 @@ def test_covariance_idl(): pe.covariance([obs1, obs2]) +def test_correlation_intersection_of_idls(): + range1 = range(1, 2000, 2) + range2 = range(2, 2001, 2) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs2_a = 0.4 * pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + 0.6 * obs1 + obs1.gamma_method() + obs2_a.gamma_method() + + cov1 = pe.covariance([obs1, obs2_a]) + corr1 = pe.covariance([obs1, obs2_a], correlation=True) + + obs2_b = obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + obs2_b.gamma_method() + + cov2 = pe.covariance([obs1, obs2_b]) + corr2 = pe.covariance([obs1, obs2_b], correlation=True) + + assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14) + assert cov1[0, 1] > cov2[0, 1] + + obs2_c = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + obs2_c.gamma_method() + assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14) + + +def test_covariance_non_identical_objects(): + obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"]) + obs1.gamma_method() + obs2 = obs1 + 1e-18 + obs2.gamma_method() + assert obs1 == obs2 + assert obs1 is not obs2 + assert np.allclose(np.ones((2, 2)), pe.covariance([obs1, obs2], correlation=True), atol=1e-14) + + +def test_covariance_additional_non_overlapping_data(): + range1 = range(1, 20, 2) + + data2 = np.random.normal(0.0, 0.1, len(range1)) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs2_a = pe.Obs([data2], ["ens"], idl=[range1]) + obs1.gamma_method() + obs2_a.gamma_method() + + corr1 = pe.covariance([obs1, obs2_a], correlation=True) + + added_data = np.random.normal(0.0, 0.1, len(range1)) + added_data -= np.mean(added_data) - np.mean(data2) + data2_extended = np.ravel([data2, added_data], 'F') + + obs2_b = pe.Obs([data2_extended], ["ens"]) + obs2_b.gamma_method() + + corr2 = pe.covariance([obs1, obs2_b], correlation=True) + + assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14) + + +def test_coavariance_reorder_non_overlapping_data(): + range1 = range(1, 20, 2) + range2 = range(1, 41, 2) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs2_b = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + obs1.gamma_method() + obs2_b.gamma_method() + + corr1 = pe.covariance([obs1, obs2_b], correlation=True) + + deltas = list(obs2_b.deltas['ens'][:len(range1)]) + sorted(obs2_b.deltas['ens'][len(range1):]) + obs2_a = pe.Obs([obs2_b.value + np.array(deltas)], ["ens"], idl=[range2]) + obs2_a.gamma_method() + + corr2 = pe.covariance([obs1, obs2_a], correlation=True) + + assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14) + + def test_empty_obs(): o = pe.Obs([np.random.rand(100)], ['test']) q = o + pe.Obs([], [], means=[])