From 934d0912498f9951d34b803b8cab223b91f0d1cd Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 8 Apr 2022 11:14:58 +0100 Subject: [PATCH 1/8] feat: _intersection_idx and _collapse_deltas_for_merge together with tests added. --- pyerrors/obs.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++ tests/obs_test.py | 20 +++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index b4eb6469..f04b96c9 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -972,6 +972,33 @@ def _merge_idx(idl): return sorted(set().union(*idl)) +def _intersection_idx(idl): + """Returns the intersection of all lists in idl as sorted list + + Parameters + ---------- + idl : list + List of lists or ranges. + """ + + # Use groupby to efficiently check whether all elements of idl are identical + try: + g = groupby(idl) + if next(g, True) and not next(g, False): + return idl[0] + except Exception: + pass + + if np.all([type(idx) is range for idx in idl]): + if len(set([idx[0] for idx in idl])) == 1: + idstart = max([idx.start for idx in idl]) + idstop = min([idx.stop for idx in idl]) + idstep = max([idx.step for idx in idl]) + return range(idstart, idstop, idstep) + + return sorted(set().intersection(*idl)) + + def _expand_deltas_for_merge(deltas, idx, shape, new_idx): """Expand deltas defined on idx to the list of configs that is defined by new_idx. New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest @@ -999,6 +1026,34 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx): return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) +def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): + """Collapse deltas defined on idx to the list of configs that is defined by new_idx. + If idx and new_idx are of type range, the smallest + common divisor of the step sizes is used as new step size. + + Parameters + ---------- + deltas : list + List of fluctuations + idx : list + List or range of configs on which the deltas are defined. + Has to be a subset of new_idx and has to be sorted in ascending order. + shape : list + Number of configs in idx. + new_idx : list + List of configs that defines the new range, has to be sorted in ascending order. + """ + + if type(idx) is range and type(new_idx) is range: + if idx == new_idx: + return deltas + ret = np.zeros(new_idx[-1] - new_idx[0] + 1) + for i in range(shape): + if idx[i] in new_idx: + ret[idx[i] - new_idx[0]] = deltas[i] + return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) + + def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): """Filter out all configurations with vanishing fluctuation such that they do not contribute to the error estimate anymore. Returns the new deltas and diff --git a/tests/obs_test.py b/tests/obs_test.py index 381ecdcb..8ed497d6 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -515,6 +515,26 @@ def test_merge_idx(): assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50) +def test_intersection_idx(): + assert pe.obs._intersection_idx([range(1, 100), range(1, 100), range(1, 100)]) == range(1, 100) + assert pe.obs._intersection_idx([range(1, 100, 10), range(1, 100, 2)]) == range(1, 100, 10) + assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50) + assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250) + + +def test_intersection_collapse(): + range1 = range(1, 2000, 2) + range2 = range(2, 2001, 8) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs_merge = obs1 + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + + intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]]) + coll = pe.obs._collapse_deltas_for_merge(obs_merge.deltas["ens"], obs_merge.idl["ens"], len(obs_merge.idl["ens"]), range1) + + assert np.all(coll == obs1.deltas["ens"]) + + def test_irregular_error_propagation(): obs_list = [pe.Obs([np.random.rand(100)], ['t']), pe.Obs([np.random.rand(50)], ['t'], idl=[range(1, 100, 2)]), From eacc9b19a3b1f1f82e0f077698ce12031d27ba9a Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 8 Apr 2022 11:44:01 +0100 Subject: [PATCH 2/8] feat: the correlation for two observables with different idls is now based on the intersection of two instead of the union. Tests added. --- pyerrors/obs.py | 19 ++++++++++--------- tests/obs_test.py | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index f04b96c9..7e1ed9e0 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1472,8 +1472,8 @@ def _covariance_element(obs1, obs2): """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): - deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) - deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) + deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) + deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) return np.sum(deltas1 * deltas2) if set(obs1.names).isdisjoint(set(obs2.names)): @@ -1493,29 +1493,30 @@ def _covariance_element(obs1, obs2): for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) + idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) gamma = 0.0 for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue + if len(idl_d[r_name]) == 0: + continue gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) if gamma == 0.0: continue gamma_div = 0.0 - e_N = 0 for r_name in obs1.e_content[e_name]: if r_name not in obs2.e_content[e_name]: continue - gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) - e_N += len(idl_d[r_name]) - gamma /= max(gamma_div, 1.0) + if len(idl_d[r_name]) == 0: + continue + gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) + gamma /= gamma_div - # Bias correction hep-lat/0306017 eq. (49) - dvalue += (1 + 1 / e_N) * gamma / e_N + dvalue += gamma for e_name in obs1.cov_names: diff --git a/tests/obs_test.py b/tests/obs_test.py index 8ed497d6..c2d161e4 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -749,6 +749,32 @@ def test_covariance_idl(): pe.covariance([obs1, obs2]) +def test_correlation_intersection_of_idls(): + range1 = range(1, 2000, 2) + range2 = range(2, 2001, 2) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs2_a = 0.4 * pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + 0.6 * obs1 + obs1.gamma_method() + obs2_a.gamma_method() + + cov1 = pe.covariance([obs1, obs2_a]) + corr1 = pe.covariance([obs1, obs2_a], correlation=True) + + obs2_b = obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + obs2_b.gamma_method() + + cov2 = pe.covariance([obs1, obs2_b]) + corr2 = pe.covariance([obs1, obs2_b], correlation=True) + + assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14) + assert cov1[0, 1] > cov2[0, 1] + + obs2_c = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + obs2_c.gamma_method() + assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14) + + def test_empty_obs(): o = pe.Obs([np.random.rand(100)], ['test']) q = o + pe.Obs([], [], means=[]) From 99c98aeb9e3752529232c9db4bc873d33a684293 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 28 Apr 2022 11:25:17 +0100 Subject: [PATCH 3/8] fix: bug in obs._intersection_idx fixed. --- pyerrors/obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 7e1ed9e0..3437019a 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -996,7 +996,7 @@ def _intersection_idx(idl): idstep = max([idx.step for idx in idl]) return range(idstart, idstop, idstep) - return sorted(set().intersection(*idl)) + return sorted(set.intersection(*[set(o) for o in tt])) def _expand_deltas_for_merge(deltas, idx, shape, new_idx): From c7f17396e57418ec44626c6030bd829d43c75ea8 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 28 Apr 2022 14:07:42 +0100 Subject: [PATCH 4/8] fix: Another bug in range detection of obs._intersection_idx fixed. --- pyerrors/obs.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 3437019a..28c5c6be 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1,5 +1,7 @@ import warnings import pickle +from math import gcd +from functools import reduce import numpy as np import autograd.numpy as anp # Thinly-wrapped numpy from autograd import jacobian @@ -981,6 +983,12 @@ def _intersection_idx(idl): List of lists or ranges. """ + def _lcm(*args): + """Returns the lowest common multiple of args. + + From python 3.9 onwards the math library contains an lcm function.""" + return reduce(lambda a, b: a * b // gcd(a, b), args) + # Use groupby to efficiently check whether all elements of idl are identical try: g = groupby(idl) @@ -993,10 +1001,10 @@ def _intersection_idx(idl): if len(set([idx[0] for idx in idl])) == 1: idstart = max([idx.start for idx in idl]) idstop = min([idx.stop for idx in idl]) - idstep = max([idx.step for idx in idl]) + idstep = _lcm(*[idx.step for idx in idl]) return range(idstart, idstop, idstep) - return sorted(set.intersection(*[set(o) for o in tt])) + return sorted(set.intersection(*[set(o) for o in idl])) def _expand_deltas_for_merge(deltas, idx, shape, new_idx): From 217d310ca473bcc7ef050987b4afb78926be83cf Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 28 Apr 2022 14:13:29 +0100 Subject: [PATCH 5/8] tests: test for _intersection_idx extended. --- tests/obs_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/obs_test.py b/tests/obs_test.py index c2d161e4..8f32bd77 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -521,6 +521,9 @@ def test_intersection_idx(): assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50) assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250) + for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]: + assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids]) + def test_intersection_collapse(): range1 = range(1, 2000, 2) From 60f9bb6a89281ce7a7484db1c66819972191ef02 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 28 Apr 2022 15:30:18 +0100 Subject: [PATCH 6/8] tests: Additional tests for covariance with different idls added. --- tests/obs_test.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/obs_test.py b/tests/obs_test.py index 8f32bd77..6dd460c9 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -778,6 +778,60 @@ def test_correlation_intersection_of_idls(): assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14) +def test_covariance_non_identical_objects(): + obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"]) + obs1.gamma_method() + obs2 = obs1 + 1e-18 + obs2.gamma_method() + assert obs1 == obs2 + assert obs1 is not obs2 + assert np.allclose(np.ones((2, 2)), pe.covariance([obs1, obs2], correlation=True), atol=1e-14) + + +def test_covariance_additional_non_overlapping_data(): + range1 = range(1, 20, 2) + + data2 = np.random.normal(0.0, 0.1, len(range1)) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs2_a = pe.Obs([data2], ["ens"], idl=[range1]) + obs1.gamma_method() + obs2_a.gamma_method() + + corr1 = pe.covariance([obs1, obs2_a], correlation=True) + + added_data = np.random.normal(0.0, 0.1, len(range1)) + added_data -= np.mean(added_data) - np.mean(data2) + data2_extended = np.ravel([data2, added_data], 'F') + + obs2_b = pe.Obs([data2_extended], ["ens"]) + obs2_b.gamma_method() + + corr2 = pe.covariance([obs1, obs2_b], correlation=True) + + assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14) + + +def test_coavariance_reorder_non_overlapping_data(): + range1 = range(1, 20, 2) + range2 = range(1, 41, 2) + + obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + obs2_b = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) + obs1.gamma_method() + obs2_b.gamma_method() + + corr1 = pe.covariance([obs1, obs2_b], correlation=True) + + deltas = list(obs2_b.deltas['ens'][:len(range1)]) + sorted(obs2_b.deltas['ens'][len(range1):]) + obs2_a = pe.Obs([obs2_b.value + np.array(deltas)], ["ens"], idl=[range2]) + obs2_a.gamma_method() + + corr2 = pe.covariance([obs1, obs2_a], correlation=True) + + assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14) + + def test_empty_obs(): o = pe.Obs([np.random.rand(100)], ['test']) q = o + pe.Obs([], [], means=[]) From 9011adb0de58f5fbc132a9c5f926e5177f70c71c Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 19 May 2022 13:53:33 +0100 Subject: [PATCH 7/8] tests: additional test addedwhich checks that merge and intersction of idls agree for identical idls. --- tests/obs_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/obs_test.py b/tests/obs_test.py index 6dd460c9..82b5ffec 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -524,6 +524,12 @@ def test_intersection_idx(): for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]: assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids]) +def test_merge_intersection(): + for idl_list in [[range(1, 100), range(1, 100), range(1, 100)], + [range(4, 80, 6), range(4, 80, 6)], + [[0, 2, 8, 19, 205], [0, 2, 8, 19, 205]]]: + assert pe.obs._merge_idx(idl_list) == pe.obs._intersection_idx(idl_list) + def test_intersection_collapse(): range1 = range(1, 2000, 2) From bb0236a556109b43d6873cd0c988d268c6e9050b Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Thu, 19 May 2022 14:09:05 +0100 Subject: [PATCH 8/8] tests: test covariance vs numpy added. --- tests/obs_test.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/obs_test.py b/tests/obs_test.py index 82b5ffec..55a23abd 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -648,6 +648,26 @@ def test_covariance_is_variance(): assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1]) +def test_covariance_vs_numpy(): + N = 1078 + data1 = np.random.normal(2.5, 0.2, N) + data2 = np.random.normal(0.5, 0.08, N) + data3 = np.random.normal(-178, 5, N) + uncorr = np.row_stack([data1, data2, data3]) + corr = np.random.multivariate_normal([0.0, 17, -0.0487], [[1.0, 0.6, -0.22], [0.6, 0.8, 0.01], [-0.22, 0.01, 1.9]], N).T + + for X in [uncorr, corr]: + obs1 = pe.Obs([X[0]], ["ens1"]) + obs2 = pe.Obs([X[1]], ["ens1"]) + obs3 = pe.Obs([X[2]], ["ens1"]) + obs1.gamma_method(S=0.0) + obs2.gamma_method(S=0.0) + obs3.gamma_method(S=0.0) + pe_cov = pe.covariance([obs1, obs2, obs3]) + np_cov = np.cov(X) / N + assert np.allclose(pe_cov, np_cov, atol=1e-14) + + def test_covariance_symmetry(): value1 = np.random.normal(5, 10) dvalue1 = np.abs(np.random.normal(0, 1))