[Fix] Removed the possibility to create an Obs from data on several replica

2025-10-16 01:19:57 +02:00 · 2025-02-17 18:59:45 +01:00 · 2025-02-17 18:59:45 +01:00 · f2fb69a79d
commit f2fb69a79d
parent 6ed6ce6113
6 changed files with 67 additions and 29 deletions
--- a/pyerrors/input/dobs.py
+++ b/pyerrors/input/dobs.py
@ -529,7 +529,8 @@ def import_dobs_string(content, full_output=False, separator_insertion=True):
                deltas.append(repdeltas)
                idl.append(repidl)
-        res.append(Obs(deltas, obs_names, idl=idl))
+        obsmeans = [np.average(deltas[j]) for j in range(len(deltas))]
        res.append(Obs([np.array(deltas[j]) - obsmeans[j] for j in range(len(obsmeans))], obs_names, idl=idl, means=obsmeans))
        res[-1]._value = mean[i]
    _check(len(e_names) == ne)
--- a/pyerrors/input/json.py
+++ b/pyerrors/input/json.py
@ -136,7 +136,8 @@ def create_json_string(ol, description='', indent=1):
            samples.append([np.nan] * len(value))
            names.append(key)
            idl.append(value)
-        my_obs = Obs(samples, names, idl)
+        my_obs = Obs(np.array(samples), names, idl, means=[np.nan for n in names])
        my_obs._value = np.nan
        my_obs._covobs = obs._covobs
        for name in obs._covobs:
            my_obs.names.append(name)
@ -331,7 +332,8 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
        cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
        if od:
-            ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl'])
+            r_offsets = [np.average([ddi[0] for ddi in di]) for di in od['deltas']]
            ret = Obs([np.array([ddi[0] for ddi in od['deltas'][i]]) - r_offsets[i] for i in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[0] for ro in r_offsets])
            ret._value = values[0]
        else:
            ret = Obs([], [], means=[])
@ -356,7 +358,8 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
        taglist = o.get('tag', layout * [None])
        for i in range(layout):
            if od:
-                ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl']))
+                r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
                ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
                ret[-1]._value = values[i]
            else:
                ret.append(Obs([], [], means=[]))
@ -383,7 +386,8 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
        taglist = o.get('tag', N * [None])
        for i in range(N):
            if od:
-                ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl']))
+                r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
                ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
                ret[-1]._value = values[i]
            else:
                ret.append(Obs([], [], means=[]))
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@ -82,6 +82,8 @@ class Obs:
                    raise ValueError('Names are not unique.')
                if not all(isinstance(x, str) for x in names):
                    raise TypeError('All names have to be strings.')
                if len(set([o.split('|')[0] for o in names])) > 1:
                    raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
            else:
                if not isinstance(names[0], str):
                    raise TypeError('All names have to be strings.')
@ -1407,6 +1409,8 @@ def reweight(weight, obs, **kwargs):
            raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
        if not set(obs[i].names).issubset(weight.names):
            raise ValueError('Error: Ensembles do not fit')
        if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
            raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
        for name in obs[i].names:
            if not set(obs[i].idl[name]).issubset(weight.idl[name]):
                raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
@ -1442,9 +1446,12 @@ def correlate(obs_a, obs_b):
    -----
    Keep in mind to only correlate primary observables which have not been reweighted
    yet. The reweighting has to be applied after correlating the observables.
-    Currently only works if ensembles are identical (this is not strictly necessary).
+    Only works if a single ensemble is present in the Obs.
    Currently only works if ensemble content is identical (this is not strictly necessary).
    """
    if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
        raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
    if sorted(obs_a.names) != sorted(obs_b.names):
        raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
    if len(obs_a.cov_names) or len(obs_b.cov_names):
@ -1755,7 +1762,11 @@ def import_bootstrap(boots, name, random_numbers):
 def merge_obs(list_of_obs):
-    """Combine all observables in list_of_obs into one new observable
+    """Combine all observables in list_of_obs into one new observable.
    This allows to merge Obs that have been computed on multiple replica
    of the same ensemble.
    If you like to merge Obs that are based on several ensembles, please
    average them yourself.
    Parameters
    ----------
--- a/tests/json_io_test.py
+++ b/tests/json_io_test.py
@ -337,7 +337,7 @@ def test_dobsio():
    tt4 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(1, 101, 1), range(2, 202, 2)])
-    ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt)]
+    ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt), o4.reweight(o4)]
    print(ol)
    fname = 'test_rw'
@ -362,9 +362,12 @@ def test_dobsio():
 def test_reconstruct_non_linear_r_obs(tmp_path):
-    to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)],
+    to = (
-                ["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"],
+        pe.Obs([np.random.rand(500), np.random.rand(500)],
-                idl=[range(1, 501), range(0, 500), range(1, 999, 9)])
+                ["e|r1", "e|r2", ],
                idl=[range(1, 501), range(0, 500)])
        + pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
        )
    to = np.log(to ** 2) / to
    to.dump((tmp_path / "test_equality").as_posix())
    ro = pe.input.json.load_json((tmp_path / "test_equality").as_posix())
@ -372,9 +375,12 @@ def test_reconstruct_non_linear_r_obs(tmp_path):
 def test_reconstruct_non_linear_r_obs_list(tmp_path):
-    to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)],
+    to = (
-                ["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"],
+        pe.Obs([np.random.rand(500), np.random.rand(500)],
-                idl=[range(1, 501), range(0, 500), range(1, 999, 9)])
+                ["e|r1", "e|r2", ],
                idl=[range(1, 501), range(0, 500)])
        + pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
        )
    to = np.log(to ** 2) / to
    for to_list in [[to, to, to], np.array([to, to, to])]:
        pe.input.json.dump_to_json(to_list, (tmp_path / "test_equality_list").as_posix())
--- a/tests/linalg_test.py
+++ b/tests/linalg_test.py
@ -34,7 +34,7 @@ def test_matmul():
            my_list = []
            length = 100 + np.random.randint(200)
            for i in range(dim ** 2):
-                my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
+                my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
            my_array = const * np.array(my_list).reshape((dim, dim))
            tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
            for t, e in np.ndenumerate(tt):
@ -43,8 +43,8 @@ def test_matmul():
            my_list = []
            length = 100 + np.random.randint(200)
            for i in range(dim ** 2):
-                my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
+                my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
-                                       pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
+                                       pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
            my_array = np.array(my_list).reshape((dim, dim)) * const
            tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
            for t, e in np.ndenumerate(tt):
@ -151,7 +151,7 @@ def test_multi_dot():
        my_list = []
        length = 1000 + np.random.randint(200)
        for i in range(dim ** 2):
-            my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))
+            my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
        my_array = pe.cov_Obs(1.0, 0.002, 'cov') * np.array(my_list).reshape((dim, dim))
        tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
        for t, e in np.ndenumerate(tt):
@ -160,8 +160,8 @@ def test_multi_dot():
        my_list = []
        length = 1000 + np.random.randint(200)
        for i in range(dim ** 2):
-            my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']),
+            my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
-                                   pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])))
+                                   pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
        my_array = np.array(my_list).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov')
        tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
        for t, e in np.ndenumerate(tt):
@ -209,7 +209,7 @@ def test_irregular_matrix_inverse():
    for idl in [range(8, 508, 10), range(250, 273), [2, 8, 19, 20, 78, 99, 828, 10548979]]:
        irregular_array = []
        for i in range(dim ** 2):
-            irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl)), np.random.normal(0.25, 0.1, 10)], ['ens1', 'ens2'], idl=[idl, range(1, 11)]))
+            irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl))], ['ens1'], idl=[idl]) + pe.Obs([np.random.normal(0.25, 0.1, 10)], ['ens2'], idl=[range(1, 11)]))
        irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(1.0, 0.002, 'ens2|r23')
        invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@ -333,7 +333,7 @@ def test_derived_observables():
 def test_multi_ens():
    names = ['A0', 'A1|r001', 'A1|r002']
-    test_obs = pe.Obs([np.random.rand(50), np.random.rand(50), np.random.rand(50)], names)
+    test_obs = pe.Obs([np.random.rand(50)], names[:1]) + pe.Obs([np.random.rand(50), np.random.rand(50)], names[1:])
    assert test_obs.e_names == ['A0', 'A1']
    assert test_obs.e_content['A0'] == ['A0']
    assert test_obs.e_content['A1'] == ['A1|r001', 'A1|r002']
@ -345,6 +345,9 @@ def test_multi_ens():
        ensembles.append(str(i))
    assert my_sum.e_names == sorted(ensembles)
    with pytest.raises(ValueError):
        test_obs = pe.Obs([np.random.rand(50), np.random.rand(50), np.random.rand(50)], names)
 def test_multi_ens2():
    names = ['ens', 'e', 'en', 'e|r010', 'E|er', 'ens|', 'Ens|34', 'ens|r548984654ez4e3t34terh']
@ -498,18 +501,25 @@ def test_reweighting():
    with pytest.raises(ValueError):
        pe.reweight(my_irregular_obs, [my_obs])
    my_merged_obs = my_obs + pe.Obs([np.random.rand(1000)], ['q'])
    with pytest.raises(ValueError):
        pe.reweight(my_merged_obs, [my_merged_obs])
 def test_merge_obs():
-    my_obs1 = pe.Obs([np.random.rand(100)], ['t'])
+    my_obs1 = pe.Obs([np.random.rand(100)], ['t|1'])
-    my_obs2 = pe.Obs([np.random.rand(100)], ['q'], idl=[range(1, 200, 2)])
+    my_obs2 = pe.Obs([np.random.rand(100)], ['t|2'], idl=[range(1, 200, 2)])
    merged = pe.merge_obs([my_obs1, my_obs2])
-    diff = merged - my_obs2 - my_obs1
+    diff = merged - (my_obs2 + my_obs1) / 2
-    assert diff == -(my_obs1.value + my_obs2.value) / 2
+    assert diff.value == 0
    with pytest.raises(ValueError):
        pe.merge_obs([my_obs1, my_obs1])
    my_covobs = pe.cov_Obs(1.0, 0.003, 'cov')
    with pytest.raises(ValueError):
        pe.merge_obs([my_obs1, my_covobs])
    my_obs3 = pe.Obs([np.random.rand(100)], ['q|2'], idl=[range(1, 200, 2)])
    with pytest.raises(ValueError):
        pe.merge_obs([my_obs1, my_obs3])
@ -542,6 +552,9 @@ def test_correlate():
    my_obs6 = pe.Obs([np.random.rand(100)], ['t'], idl=[range(5, 505, 5)])
    corr3 = pe.correlate(my_obs5, my_obs6)
    assert my_obs5.idl == corr3.idl
    my_obs7 = pe.Obs([np.random.rand(99)], ['q'])
    with pytest.raises(ValueError):
        pe.correlate(my_obs1, my_obs7)
    my_new_obs = pe.Obs([np.random.rand(100)], ['q3'])
    with pytest.raises(ValueError):
@ -681,14 +694,14 @@ def test_gamma_method_irregular():
    assert (a.dvalue - 5 * a.ddvalue < expe and expe < a.dvalue + 5 * a.ddvalue)
    arr2 = np.random.normal(1, .2, size=N)
-    afull = pe.Obs([arr, arr2], ['a1', 'a2'])
+    afull = pe.Obs([arr], ['a1']) + pe.Obs([arr2], ['a2'])
    configs = np.ones_like(arr2)
    for i in np.random.uniform(0, len(arr2), size=int(.8*N)):
        configs[int(i)] = 0
    zero_arr2 = [arr2[i] for i in range(len(arr2)) if not configs[i] == 0]
    idx2 = [i + 1 for i in range(len(configs)) if configs[i] == 1]
-    a = pe.Obs([zero_arr, zero_arr2], ['a1', 'a2'], idl=[idx, idx2])
+    a = pe.Obs([zero_arr], ['a1'], idl=[idx]) + pe.Obs([zero_arr2], ['a2'], idl=[idx2])
    afull.gamma_method()
    a.gamma_method()
@ -1022,7 +1035,7 @@ def test_correlation_intersection_of_idls():
 def test_covariance_non_identical_objects():
-    obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"])
+    obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000)], ["ens|r1", "ens|r2"]) + pe.Obs([np.random.normal(1.0, 0.1, 732)], ['ens2'])
    obs1.gamma_method()
    obs2 = obs1 + 1e-18
    obs2.gamma_method()
@ -1106,6 +1119,9 @@ def test_reweight_method():
    obs1 = pe.pseudo_Obs(0.2, 0.01, 'test')
    rw = pe.pseudo_Obs(0.999, 0.001, 'test')
    assert obs1.reweight(rw) == pe.reweight(rw, [obs1])[0]
    rw2 = pe.pseudo_Obs(0.999, 0.001, 'test2')
    with pytest.raises(ValueError):
        obs1.reweight(rw2)
 def test_jackknife():