[Fix] Removed the possibility to create an Obs from data on several replica

This commit is contained in:
Simon Kuberski 2025-02-17 18:59:45 +01:00
parent 6ed6ce6113
commit f2fb69a79d
6 changed files with 67 additions and 29 deletions

View file

@ -529,7 +529,8 @@ def import_dobs_string(content, full_output=False, separator_insertion=True):
deltas.append(repdeltas) deltas.append(repdeltas)
idl.append(repidl) idl.append(repidl)
res.append(Obs(deltas, obs_names, idl=idl)) obsmeans = [np.average(deltas[j]) for j in range(len(deltas))]
res.append(Obs([np.array(deltas[j]) - obsmeans[j] for j in range(len(obsmeans))], obs_names, idl=idl, means=obsmeans))
res[-1]._value = mean[i] res[-1]._value = mean[i]
_check(len(e_names) == ne) _check(len(e_names) == ne)

View file

@ -136,7 +136,8 @@ def create_json_string(ol, description='', indent=1):
samples.append([np.nan] * len(value)) samples.append([np.nan] * len(value))
names.append(key) names.append(key)
idl.append(value) idl.append(value)
my_obs = Obs(samples, names, idl) my_obs = Obs(np.array(samples), names, idl, means=[np.nan for n in names])
my_obs._value = np.nan
my_obs._covobs = obs._covobs my_obs._covobs = obs._covobs
for name in obs._covobs: for name in obs._covobs:
my_obs.names.append(name) my_obs.names.append(name)
@ -331,7 +332,8 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) cd = _gen_covobsd_from_cdatad(o.get('cdata', {}))
if od: if od:
ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl']) r_offsets = [np.average([ddi[0] for ddi in di]) for di in od['deltas']]
ret = Obs([np.array([ddi[0] for ddi in od['deltas'][i]]) - r_offsets[i] for i in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[0] for ro in r_offsets])
ret._value = values[0] ret._value = values[0]
else: else:
ret = Obs([], [], means=[]) ret = Obs([], [], means=[])
@ -356,7 +358,8 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
taglist = o.get('tag', layout * [None]) taglist = o.get('tag', layout * [None])
for i in range(layout): for i in range(layout):
if od: if od:
ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl'])) r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
ret[-1]._value = values[i] ret[-1]._value = values[i]
else: else:
ret.append(Obs([], [], means=[])) ret.append(Obs([], [], means=[]))
@ -383,7 +386,8 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
taglist = o.get('tag', N * [None]) taglist = o.get('tag', N * [None])
for i in range(N): for i in range(N):
if od: if od:
ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl'])) r_offsets = np.array([np.average(di[:, i]) for di in od['deltas']])
ret.append(Obs([od['deltas'][j][:, i] - r_offsets[j] for j in range(len(od['deltas']))], od['names'], idl=od['idl'], means=[ro + values[i] for ro in r_offsets]))
ret[-1]._value = values[i] ret[-1]._value = values[i]
else: else:
ret.append(Obs([], [], means=[])) ret.append(Obs([], [], means=[]))

View file

@ -82,6 +82,8 @@ class Obs:
raise ValueError('Names are not unique.') raise ValueError('Names are not unique.')
if not all(isinstance(x, str) for x in names): if not all(isinstance(x, str) for x in names):
raise TypeError('All names have to be strings.') raise TypeError('All names have to be strings.')
if len(set([o.split('|')[0] for o in names])) > 1:
raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.')
else: else:
if not isinstance(names[0], str): if not isinstance(names[0], str):
raise TypeError('All names have to be strings.') raise TypeError('All names have to be strings.')
@ -1407,6 +1409,8 @@ def reweight(weight, obs, **kwargs):
raise ValueError('Error: Not possible to reweight an Obs that contains covobs!') raise ValueError('Error: Not possible to reweight an Obs that contains covobs!')
if not set(obs[i].names).issubset(weight.names): if not set(obs[i].names).issubset(weight.names):
raise ValueError('Error: Ensembles do not fit') raise ValueError('Error: Ensembles do not fit')
if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1:
raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.')
for name in obs[i].names: for name in obs[i].names:
if not set(obs[i].idl[name]).issubset(weight.idl[name]): if not set(obs[i].idl[name]).issubset(weight.idl[name]):
raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name))
@ -1442,9 +1446,12 @@ def correlate(obs_a, obs_b):
----- -----
Keep in mind to only correlate primary observables which have not been reweighted Keep in mind to only correlate primary observables which have not been reweighted
yet. The reweighting has to be applied after correlating the observables. yet. The reweighting has to be applied after correlating the observables.
Currently only works if ensembles are identical (this is not strictly necessary). Only works if a single ensemble is present in the Obs.
Currently only works if ensemble content is identical (this is not strictly necessary).
""" """
if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1:
raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.')
if sorted(obs_a.names) != sorted(obs_b.names): if sorted(obs_a.names) != sorted(obs_b.names):
raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}")
if len(obs_a.cov_names) or len(obs_b.cov_names): if len(obs_a.cov_names) or len(obs_b.cov_names):
@ -1755,7 +1762,11 @@ def import_bootstrap(boots, name, random_numbers):
def merge_obs(list_of_obs): def merge_obs(list_of_obs):
"""Combine all observables in list_of_obs into one new observable """Combine all observables in list_of_obs into one new observable.
This allows to merge Obs that have been computed on multiple replica
of the same ensemble.
If you like to merge Obs that are based on several ensembles, please
average them yourself.
Parameters Parameters
---------- ----------

View file

@ -337,7 +337,7 @@ def test_dobsio():
tt4 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(1, 101, 1), range(2, 202, 2)]) tt4 = pe.Obs([np.random.rand(100), np.random.rand(100)], ['t|r1', 't|r2'], idl=[range(1, 101, 1), range(2, 202, 2)])
ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt)] ol = [o2, o3, o4, do, o5, tt, tt4, np.log(tt4 / o5**2), np.exp(o5 + np.log(co3 / tt3 + o4) / tt), o4.reweight(o4)]
print(ol) print(ol)
fname = 'test_rw' fname = 'test_rw'
@ -362,9 +362,12 @@ def test_dobsio():
def test_reconstruct_non_linear_r_obs(tmp_path): def test_reconstruct_non_linear_r_obs(tmp_path):
to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)], to = (
["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], pe.Obs([np.random.rand(500), np.random.rand(500)],
idl=[range(1, 501), range(0, 500), range(1, 999, 9)]) ["e|r1", "e|r2", ],
idl=[range(1, 501), range(0, 500)])
+ pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
)
to = np.log(to ** 2) / to to = np.log(to ** 2) / to
to.dump((tmp_path / "test_equality").as_posix()) to.dump((tmp_path / "test_equality").as_posix())
ro = pe.input.json.load_json((tmp_path / "test_equality").as_posix()) ro = pe.input.json.load_json((tmp_path / "test_equality").as_posix())
@ -372,9 +375,12 @@ def test_reconstruct_non_linear_r_obs(tmp_path):
def test_reconstruct_non_linear_r_obs_list(tmp_path): def test_reconstruct_non_linear_r_obs_list(tmp_path):
to = pe.Obs([np.random.rand(500), np.random.rand(500), np.random.rand(111)], to = (
["e|r1", "e|r2", "my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], pe.Obs([np.random.rand(500), np.random.rand(500)],
idl=[range(1, 501), range(0, 500), range(1, 999, 9)]) ["e|r1", "e|r2", ],
idl=[range(1, 501), range(0, 500)])
+ pe.Obs([np.random.rand(111)], ["my_new_ensemble_54^£$|8'[@124435%6^7&()~#"], idl=[range(1, 999, 9)])
)
to = np.log(to ** 2) / to to = np.log(to ** 2) / to
for to_list in [[to, to, to], np.array([to, to, to])]: for to_list in [[to, to, to], np.array([to, to, to])]:
pe.input.json.dump_to_json(to_list, (tmp_path / "test_equality_list").as_posix()) pe.input.json.dump_to_json(to_list, (tmp_path / "test_equality_list").as_posix())

View file

@ -34,7 +34,7 @@ def test_matmul():
my_list = [] my_list = []
length = 100 + np.random.randint(200) length = 100 + np.random.randint(200)
for i in range(dim ** 2): for i in range(dim ** 2):
my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])) my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
my_array = const * np.array(my_list).reshape((dim, dim)) my_array = const * np.array(my_list).reshape((dim, dim))
tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
for t, e in np.ndenumerate(tt): for t, e in np.ndenumerate(tt):
@ -43,8 +43,8 @@ def test_matmul():
my_list = [] my_list = []
length = 100 + np.random.randint(200) length = 100 + np.random.randint(200)
for i in range(dim ** 2): for i in range(dim ** 2):
my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']), my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))) pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
my_array = np.array(my_list).reshape((dim, dim)) * const my_array = np.array(my_list).reshape((dim, dim)) * const
tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array
for t, e in np.ndenumerate(tt): for t, e in np.ndenumerate(tt):
@ -151,7 +151,7 @@ def test_multi_dot():
my_list = [] my_list = []
length = 1000 + np.random.randint(200) length = 1000 + np.random.randint(200)
for i in range(dim ** 2): for i in range(dim ** 2):
my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])) my_list.append(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']))
my_array = pe.cov_Obs(1.0, 0.002, 'cov') * np.array(my_list).reshape((dim, dim)) my_array = pe.cov_Obs(1.0, 0.002, 'cov') * np.array(my_list).reshape((dim, dim))
tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
for t, e in np.ndenumerate(tt): for t, e in np.ndenumerate(tt):
@ -160,8 +160,8 @@ def test_multi_dot():
my_list = [] my_list = []
length = 1000 + np.random.randint(200) length = 1000 + np.random.randint(200)
for i in range(dim ** 2): for i in range(dim ** 2):
my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']), my_list.append(pe.CObs(pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2']),
pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))) pe.Obs([np.random.rand(length)], ['t1']) + pe.Obs([np.random.rand(length + 1)], ['t2'])))
my_array = np.array(my_list).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') my_array = np.array(my_list).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov')
tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array
for t, e in np.ndenumerate(tt): for t, e in np.ndenumerate(tt):
@ -209,7 +209,7 @@ def test_irregular_matrix_inverse():
for idl in [range(8, 508, 10), range(250, 273), [2, 8, 19, 20, 78, 99, 828, 10548979]]: for idl in [range(8, 508, 10), range(250, 273), [2, 8, 19, 20, 78, 99, 828, 10548979]]:
irregular_array = [] irregular_array = []
for i in range(dim ** 2): for i in range(dim ** 2):
irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl)), np.random.normal(0.25, 0.1, 10)], ['ens1', 'ens2'], idl=[idl, range(1, 11)])) irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl))], ['ens1'], idl=[idl]) + pe.Obs([np.random.normal(0.25, 0.1, 10)], ['ens2'], idl=[range(1, 11)]))
irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(1.0, 0.002, 'ens2|r23') irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(1.0, 0.002, 'ens2|r23')
invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T

View file

@ -333,7 +333,7 @@ def test_derived_observables():
def test_multi_ens(): def test_multi_ens():
names = ['A0', 'A1|r001', 'A1|r002'] names = ['A0', 'A1|r001', 'A1|r002']
test_obs = pe.Obs([np.random.rand(50), np.random.rand(50), np.random.rand(50)], names) test_obs = pe.Obs([np.random.rand(50)], names[:1]) + pe.Obs([np.random.rand(50), np.random.rand(50)], names[1:])
assert test_obs.e_names == ['A0', 'A1'] assert test_obs.e_names == ['A0', 'A1']
assert test_obs.e_content['A0'] == ['A0'] assert test_obs.e_content['A0'] == ['A0']
assert test_obs.e_content['A1'] == ['A1|r001', 'A1|r002'] assert test_obs.e_content['A1'] == ['A1|r001', 'A1|r002']
@ -345,6 +345,9 @@ def test_multi_ens():
ensembles.append(str(i)) ensembles.append(str(i))
assert my_sum.e_names == sorted(ensembles) assert my_sum.e_names == sorted(ensembles)
with pytest.raises(ValueError):
test_obs = pe.Obs([np.random.rand(50), np.random.rand(50), np.random.rand(50)], names)
def test_multi_ens2(): def test_multi_ens2():
names = ['ens', 'e', 'en', 'e|r010', 'E|er', 'ens|', 'Ens|34', 'ens|r548984654ez4e3t34terh'] names = ['ens', 'e', 'en', 'e|r010', 'E|er', 'ens|', 'Ens|34', 'ens|r548984654ez4e3t34terh']
@ -498,18 +501,25 @@ def test_reweighting():
with pytest.raises(ValueError): with pytest.raises(ValueError):
pe.reweight(my_irregular_obs, [my_obs]) pe.reweight(my_irregular_obs, [my_obs])
my_merged_obs = my_obs + pe.Obs([np.random.rand(1000)], ['q'])
with pytest.raises(ValueError):
pe.reweight(my_merged_obs, [my_merged_obs])
def test_merge_obs(): def test_merge_obs():
my_obs1 = pe.Obs([np.random.rand(100)], ['t']) my_obs1 = pe.Obs([np.random.rand(100)], ['t|1'])
my_obs2 = pe.Obs([np.random.rand(100)], ['q'], idl=[range(1, 200, 2)]) my_obs2 = pe.Obs([np.random.rand(100)], ['t|2'], idl=[range(1, 200, 2)])
merged = pe.merge_obs([my_obs1, my_obs2]) merged = pe.merge_obs([my_obs1, my_obs2])
diff = merged - my_obs2 - my_obs1 diff = merged - (my_obs2 + my_obs1) / 2
assert diff == -(my_obs1.value + my_obs2.value) / 2 assert diff.value == 0
with pytest.raises(ValueError): with pytest.raises(ValueError):
pe.merge_obs([my_obs1, my_obs1]) pe.merge_obs([my_obs1, my_obs1])
my_covobs = pe.cov_Obs(1.0, 0.003, 'cov') my_covobs = pe.cov_Obs(1.0, 0.003, 'cov')
with pytest.raises(ValueError): with pytest.raises(ValueError):
pe.merge_obs([my_obs1, my_covobs]) pe.merge_obs([my_obs1, my_covobs])
my_obs3 = pe.Obs([np.random.rand(100)], ['q|2'], idl=[range(1, 200, 2)])
with pytest.raises(ValueError):
pe.merge_obs([my_obs1, my_obs3])
@ -542,6 +552,9 @@ def test_correlate():
my_obs6 = pe.Obs([np.random.rand(100)], ['t'], idl=[range(5, 505, 5)]) my_obs6 = pe.Obs([np.random.rand(100)], ['t'], idl=[range(5, 505, 5)])
corr3 = pe.correlate(my_obs5, my_obs6) corr3 = pe.correlate(my_obs5, my_obs6)
assert my_obs5.idl == corr3.idl assert my_obs5.idl == corr3.idl
my_obs7 = pe.Obs([np.random.rand(99)], ['q'])
with pytest.raises(ValueError):
pe.correlate(my_obs1, my_obs7)
my_new_obs = pe.Obs([np.random.rand(100)], ['q3']) my_new_obs = pe.Obs([np.random.rand(100)], ['q3'])
with pytest.raises(ValueError): with pytest.raises(ValueError):
@ -681,14 +694,14 @@ def test_gamma_method_irregular():
assert (a.dvalue - 5 * a.ddvalue < expe and expe < a.dvalue + 5 * a.ddvalue) assert (a.dvalue - 5 * a.ddvalue < expe and expe < a.dvalue + 5 * a.ddvalue)
arr2 = np.random.normal(1, .2, size=N) arr2 = np.random.normal(1, .2, size=N)
afull = pe.Obs([arr, arr2], ['a1', 'a2']) afull = pe.Obs([arr], ['a1']) + pe.Obs([arr2], ['a2'])
configs = np.ones_like(arr2) configs = np.ones_like(arr2)
for i in np.random.uniform(0, len(arr2), size=int(.8*N)): for i in np.random.uniform(0, len(arr2), size=int(.8*N)):
configs[int(i)] = 0 configs[int(i)] = 0
zero_arr2 = [arr2[i] for i in range(len(arr2)) if not configs[i] == 0] zero_arr2 = [arr2[i] for i in range(len(arr2)) if not configs[i] == 0]
idx2 = [i + 1 for i in range(len(configs)) if configs[i] == 1] idx2 = [i + 1 for i in range(len(configs)) if configs[i] == 1]
a = pe.Obs([zero_arr, zero_arr2], ['a1', 'a2'], idl=[idx, idx2]) a = pe.Obs([zero_arr], ['a1'], idl=[idx]) + pe.Obs([zero_arr2], ['a2'], idl=[idx2])
afull.gamma_method() afull.gamma_method()
a.gamma_method() a.gamma_method()
@ -1022,7 +1035,7 @@ def test_correlation_intersection_of_idls():
def test_covariance_non_identical_objects(): def test_covariance_non_identical_objects():
obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"]) obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000)], ["ens|r1", "ens|r2"]) + pe.Obs([np.random.normal(1.0, 0.1, 732)], ['ens2'])
obs1.gamma_method() obs1.gamma_method()
obs2 = obs1 + 1e-18 obs2 = obs1 + 1e-18
obs2.gamma_method() obs2.gamma_method()
@ -1106,6 +1119,9 @@ def test_reweight_method():
obs1 = pe.pseudo_Obs(0.2, 0.01, 'test') obs1 = pe.pseudo_Obs(0.2, 0.01, 'test')
rw = pe.pseudo_Obs(0.999, 0.001, 'test') rw = pe.pseudo_Obs(0.999, 0.001, 'test')
assert obs1.reweight(rw) == pe.reweight(rw, [obs1])[0] assert obs1.reweight(rw) == pe.reweight(rw, [obs1])[0]
rw2 = pe.pseudo_Obs(0.999, 0.001, 'test2')
with pytest.raises(ValueError):
obs1.reweight(rw2)
def test_jackknife(): def test_jackknife():