From 934d0912498f9951d34b803b8cab223b91f0d1cd Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Fri, 8 Apr 2022 11:14:58 +0100
Subject: [PATCH 1/8] feat: _intersection_idx and _collapse_deltas_for_merge
 together with tests added.

---
 pyerrors/obs.py   | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 tests/obs_test.py | 20 +++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/pyerrors/obs.py b/pyerrors/obs.py
index b4eb6469..f04b96c9 100644
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@@ -972,6 +972,33 @@ def _merge_idx(idl):
     return sorted(set().union(*idl))
 
 
+def _intersection_idx(idl):
+    """Returns the intersection of all lists in idl as sorted list
+
+    Parameters
+    ----------
+    idl : list
+        List of lists or ranges.
+    """
+
+    # Use groupby to efficiently check whether all elements of idl are identical
+    try:
+        g = groupby(idl)
+        if next(g, True) and not next(g, False):
+            return idl[0]
+    except Exception:
+        pass
+
+    if np.all([type(idx) is range for idx in idl]):
+        if len(set([idx[0] for idx in idl])) == 1:
+            idstart = max([idx.start for idx in idl])
+            idstop = min([idx.stop for idx in idl])
+            idstep = max([idx.step for idx in idl])
+            return range(idstart, idstop, idstep)
+
+    return sorted(set().intersection(*idl))
+
+
 def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
     """Expand deltas defined on idx to the list of configs that is defined by new_idx.
        New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
@@ -999,6 +1026,34 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
     return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
 
 
+def _collapse_deltas_for_merge(deltas, idx, shape, new_idx):
+    """Collapse deltas defined on idx to the list of configs that is defined by new_idx.
+       If idx and new_idx are of type range, the smallest
+       common divisor of the step sizes is used as new step size.
+
+    Parameters
+    ----------
+    deltas : list
+        List of fluctuations
+    idx : list
+        List or range of configs on which the deltas are defined.
+        Has to be a subset of new_idx and has to be sorted in ascending order.
+    shape : list
+        Number of configs in idx.
+    new_idx : list
+        List of configs that defines the new range, has to be sorted in ascending order.
+    """
+
+    if type(idx) is range and type(new_idx) is range:
+        if idx == new_idx:
+            return deltas
+    ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
+    for i in range(shape):
+        if idx[i] in new_idx:
+            ret[idx[i] - new_idx[0]] = deltas[i]
+    return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
+
+
 def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
     """Filter out all configurations with vanishing fluctuation such that they do not
        contribute to the error estimate anymore. Returns the new deltas and
diff --git a/tests/obs_test.py b/tests/obs_test.py
index 381ecdcb..8ed497d6 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -515,6 +515,26 @@ def test_merge_idx():
     assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50)
 
 
+def test_intersection_idx():
+    assert pe.obs._intersection_idx([range(1, 100), range(1, 100), range(1, 100)]) == range(1, 100)
+    assert pe.obs._intersection_idx([range(1, 100, 10), range(1, 100, 2)]) == range(1, 100, 10)
+    assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50)
+    assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250)
+
+
+def test_intersection_collapse():
+    range1 = range(1, 2000, 2)
+    range2 = range(2, 2001, 8)
+
+    obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
+    obs_merge = obs1 + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
+
+    intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]])
+    coll = pe.obs._collapse_deltas_for_merge(obs_merge.deltas["ens"], obs_merge.idl["ens"], len(obs_merge.idl["ens"]), range1)
+
+    assert np.all(coll == obs1.deltas["ens"])
+
+
 def test_irregular_error_propagation():
     obs_list = [pe.Obs([np.random.rand(100)], ['t']),
                 pe.Obs([np.random.rand(50)], ['t'], idl=[range(1, 100, 2)]),

From eacc9b19a3b1f1f82e0f077698ce12031d27ba9a Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Fri, 8 Apr 2022 11:44:01 +0100
Subject: [PATCH 2/8] feat: the correlation for two observables with different
 idls is now based on the intersection of two instead of the union. Tests
 added.

---
 pyerrors/obs.py   | 19 ++++++++++---------
 tests/obs_test.py | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/pyerrors/obs.py b/pyerrors/obs.py
index f04b96c9..7e1ed9e0 100644
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@@ -1472,8 +1472,8 @@ def _covariance_element(obs1, obs2):
     """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
 
     def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
-        deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
-        deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
+        deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
+        deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
         return np.sum(deltas1 * deltas2)
 
     if set(obs1.names).isdisjoint(set(obs2.names)):
@@ -1493,29 +1493,30 @@ def _covariance_element(obs1, obs2):
         for r_name in obs1.e_content[e_name]:
             if r_name not in obs2.e_content[e_name]:
                 continue
-            idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]])
+            idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
 
         gamma = 0.0
 
         for r_name in obs1.e_content[e_name]:
             if r_name not in obs2.e_content[e_name]:
                 continue
+            if len(idl_d[r_name]) == 0:
+                continue
             gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
 
         if gamma == 0.0:
             continue
 
         gamma_div = 0.0
-        e_N = 0
         for r_name in obs1.e_content[e_name]:
             if r_name not in obs2.e_content[e_name]:
                 continue
-            gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
-            e_N += len(idl_d[r_name])
-        gamma /= max(gamma_div, 1.0)
+            if len(idl_d[r_name]) == 0:
+                continue
+            gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
+        gamma /= gamma_div
 
-        # Bias correction hep-lat/0306017 eq. (49)
-        dvalue += (1 + 1 / e_N) * gamma / e_N
+        dvalue += gamma
 
     for e_name in obs1.cov_names:
 
diff --git a/tests/obs_test.py b/tests/obs_test.py
index 8ed497d6..c2d161e4 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -749,6 +749,32 @@ def test_covariance_idl():
     pe.covariance([obs1, obs2])
 
 
+def test_correlation_intersection_of_idls():
+    range1 = range(1, 2000, 2)
+    range2 = range(2, 2001, 2)
+
+    obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
+    obs2_a = 0.4 * pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + 0.6 * obs1
+    obs1.gamma_method()
+    obs2_a.gamma_method()
+
+    cov1 = pe.covariance([obs1, obs2_a])
+    corr1 = pe.covariance([obs1, obs2_a], correlation=True)
+
+    obs2_b = obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
+    obs2_b.gamma_method()
+
+    cov2 = pe.covariance([obs1, obs2_b])
+    corr2 = pe.covariance([obs1, obs2_b], correlation=True)
+
+    assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
+    assert cov1[0, 1] > cov2[0, 1]
+
+    obs2_c = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
+    obs2_c.gamma_method()
+    assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14)
+
+
 def test_empty_obs():
     o = pe.Obs([np.random.rand(100)], ['test'])
     q = o + pe.Obs([], [], means=[])

From 99c98aeb9e3752529232c9db4bc873d33a684293 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 28 Apr 2022 11:25:17 +0100
Subject: [PATCH 3/8] fix: bug in obs._intersection_idx fixed.

---
 pyerrors/obs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyerrors/obs.py b/pyerrors/obs.py
index 7e1ed9e0..3437019a 100644
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@@ -996,7 +996,7 @@ def _intersection_idx(idl):
             idstep = max([idx.step for idx in idl])
             return range(idstart, idstop, idstep)
 
-    return sorted(set().intersection(*idl))
+    return sorted(set.intersection(*[set(o) for o in tt]))
 
 
 def _expand_deltas_for_merge(deltas, idx, shape, new_idx):

From c7f17396e57418ec44626c6030bd829d43c75ea8 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 28 Apr 2022 14:07:42 +0100
Subject: [PATCH 4/8] fix: Another bug in range detection of
 obs._intersection_idx fixed.

---
 pyerrors/obs.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pyerrors/obs.py b/pyerrors/obs.py
index 3437019a..28c5c6be 100644
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@@ -1,5 +1,7 @@
 import warnings
 import pickle
+from math import gcd
+from functools import reduce
 import numpy as np
 import autograd.numpy as anp  # Thinly-wrapped numpy
 from autograd import jacobian
@@ -981,6 +983,12 @@ def _intersection_idx(idl):
         List of lists or ranges.
     """
 
+    def _lcm(*args):
+        """Returns the lowest common multiple of args.
+
+        From python 3.9 onwards the math library contains an lcm function."""
+        return reduce(lambda a, b: a * b // gcd(a, b), args)
+
     # Use groupby to efficiently check whether all elements of idl are identical
     try:
         g = groupby(idl)
@@ -993,10 +1001,10 @@ def _intersection_idx(idl):
         if len(set([idx[0] for idx in idl])) == 1:
             idstart = max([idx.start for idx in idl])
             idstop = min([idx.stop for idx in idl])
-            idstep = max([idx.step for idx in idl])
+            idstep = _lcm(*[idx.step for idx in idl])
             return range(idstart, idstop, idstep)
 
-    return sorted(set.intersection(*[set(o) for o in tt]))
+    return sorted(set.intersection(*[set(o) for o in idl]))
 
 
 def _expand_deltas_for_merge(deltas, idx, shape, new_idx):

From 217d310ca473bcc7ef050987b4afb78926be83cf Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 28 Apr 2022 14:13:29 +0100
Subject: [PATCH 5/8] tests: test for _intersection_idx extended.

---
 tests/obs_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/obs_test.py b/tests/obs_test.py
index c2d161e4..8f32bd77 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -521,6 +521,9 @@ def test_intersection_idx():
     assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50)
     assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250)
 
+    for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]:
+        assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids])
+
 
 def test_intersection_collapse():
     range1 = range(1, 2000, 2)

From 60f9bb6a89281ce7a7484db1c66819972191ef02 Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 28 Apr 2022 15:30:18 +0100
Subject: [PATCH 6/8] tests: Additional tests for covariance with different
 idls added.

---
 tests/obs_test.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/obs_test.py b/tests/obs_test.py
index 8f32bd77..6dd460c9 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -778,6 +778,60 @@ def test_correlation_intersection_of_idls():
     assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14)
 
 
+def test_covariance_non_identical_objects():
+    obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"])
+    obs1.gamma_method()
+    obs2 = obs1 + 1e-18
+    obs2.gamma_method()
+    assert obs1 == obs2
+    assert obs1 is not obs2
+    assert np.allclose(np.ones((2, 2)), pe.covariance([obs1, obs2], correlation=True), atol=1e-14)
+
+
+def test_covariance_additional_non_overlapping_data():
+    range1 = range(1, 20, 2)
+
+    data2 = np.random.normal(0.0, 0.1, len(range1))
+
+    obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
+    obs2_a = pe.Obs([data2], ["ens"], idl=[range1])
+    obs1.gamma_method()
+    obs2_a.gamma_method()
+
+    corr1 = pe.covariance([obs1, obs2_a], correlation=True)
+
+    added_data = np.random.normal(0.0, 0.1, len(range1))
+    added_data -= np.mean(added_data) - np.mean(data2)
+    data2_extended = np.ravel([data2, added_data], 'F')
+
+    obs2_b = pe.Obs([data2_extended], ["ens"])
+    obs2_b.gamma_method()
+
+    corr2 = pe.covariance([obs1, obs2_b], correlation=True)
+
+    assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
+
+
+def test_coavariance_reorder_non_overlapping_data():
+    range1 = range(1, 20, 2)
+    range2 = range(1, 41, 2)
+
+    obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
+    obs2_b = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
+    obs1.gamma_method()
+    obs2_b.gamma_method()
+
+    corr1 = pe.covariance([obs1, obs2_b], correlation=True)
+
+    deltas = list(obs2_b.deltas['ens'][:len(range1)]) + sorted(obs2_b.deltas['ens'][len(range1):])
+    obs2_a = pe.Obs([obs2_b.value + np.array(deltas)], ["ens"], idl=[range2])
+    obs2_a.gamma_method()
+
+    corr2 = pe.covariance([obs1, obs2_a], correlation=True)
+
+    assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
+
+
 def test_empty_obs():
     o = pe.Obs([np.random.rand(100)], ['test'])
     q = o + pe.Obs([], [], means=[])

From 9011adb0de58f5fbc132a9c5f926e5177f70c71c Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 19 May 2022 13:53:33 +0100
Subject: [PATCH 7/8] tests: additional test addedwhich checks that merge and
 intersction of idls agree for identical idls.

---
 tests/obs_test.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/obs_test.py b/tests/obs_test.py
index 6dd460c9..82b5ffec 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -524,6 +524,12 @@ def test_intersection_idx():
     for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]:
         assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids])
 
+def test_merge_intersection():
+    for idl_list in [[range(1, 100), range(1, 100), range(1, 100)],
+                     [range(4, 80, 6), range(4, 80, 6)],
+                     [[0, 2, 8, 19, 205], [0, 2, 8, 19, 205]]]:
+        assert pe.obs._merge_idx(idl_list) == pe.obs._intersection_idx(idl_list)
+
 
 def test_intersection_collapse():
     range1 = range(1, 2000, 2)

From bb0236a556109b43d6873cd0c988d268c6e9050b Mon Sep 17 00:00:00 2001
From: Fabian Joswig <fabian.joswig@ed.ac.uk>
Date: Thu, 19 May 2022 14:09:05 +0100
Subject: [PATCH 8/8] tests: test covariance vs numpy added.

---
 tests/obs_test.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tests/obs_test.py b/tests/obs_test.py
index 82b5ffec..55a23abd 100644
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@@ -648,6 +648,26 @@ def test_covariance_is_variance():
     assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1])
 
 
+def test_covariance_vs_numpy():
+    N = 1078
+    data1 = np.random.normal(2.5, 0.2, N)
+    data2 = np.random.normal(0.5, 0.08, N)
+    data3 = np.random.normal(-178, 5, N)
+    uncorr = np.row_stack([data1, data2, data3])
+    corr = np.random.multivariate_normal([0.0, 17, -0.0487], [[1.0, 0.6, -0.22], [0.6, 0.8, 0.01], [-0.22, 0.01, 1.9]], N).T
+
+    for X in [uncorr, corr]:
+        obs1 = pe.Obs([X[0]], ["ens1"])
+        obs2 = pe.Obs([X[1]], ["ens1"])
+        obs3 = pe.Obs([X[2]], ["ens1"])
+        obs1.gamma_method(S=0.0)
+        obs2.gamma_method(S=0.0)
+        obs3.gamma_method(S=0.0)
+        pe_cov = pe.covariance([obs1, obs2, obs3])
+        np_cov = np.cov(X) / N
+        assert np.allclose(pe_cov, np_cov, atol=1e-14)
+
+
 def test_covariance_symmetry():
     value1 = np.random.normal(5, 10)
     dvalue1 = np.abs(np.random.normal(0, 1))