mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-03-15 06:40:24 +01:00
Merge pull request #102 from fjosw/feature/covariance_idl2
Feature/covariance idl2
This commit is contained in:
commit
f02488208a
2 changed files with 202 additions and 9 deletions
|
@ -1,5 +1,7 @@
|
|||
import warnings
|
||||
import pickle
|
||||
from math import gcd
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
import autograd.numpy as anp # Thinly-wrapped numpy
|
||||
from autograd import jacobian
|
||||
|
@ -981,6 +983,39 @@ def _merge_idx(idl):
|
|||
return sorted(set().union(*idl))
|
||||
|
||||
|
||||
def _intersection_idx(idl):
|
||||
"""Returns the intersection of all lists in idl as sorted list
|
||||
|
||||
Parameters
|
||||
----------
|
||||
idl : list
|
||||
List of lists or ranges.
|
||||
"""
|
||||
|
||||
def _lcm(*args):
|
||||
"""Returns the lowest common multiple of args.
|
||||
|
||||
From python 3.9 onwards the math library contains an lcm function."""
|
||||
return reduce(lambda a, b: a * b // gcd(a, b), args)
|
||||
|
||||
# Use groupby to efficiently check whether all elements of idl are identical
|
||||
try:
|
||||
g = groupby(idl)
|
||||
if next(g, True) and not next(g, False):
|
||||
return idl[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if np.all([type(idx) is range for idx in idl]):
|
||||
if len(set([idx[0] for idx in idl])) == 1:
|
||||
idstart = max([idx.start for idx in idl])
|
||||
idstop = min([idx.stop for idx in idl])
|
||||
idstep = _lcm(*[idx.step for idx in idl])
|
||||
return range(idstart, idstop, idstep)
|
||||
|
||||
return sorted(set.intersection(*[set(o) for o in idl]))
|
||||
|
||||
|
||||
def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
|
||||
"""Expand deltas defined on idx to the list of configs that is defined by new_idx.
|
||||
New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
|
||||
|
@ -1008,6 +1043,34 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
|
|||
return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
|
||||
|
||||
|
||||
def _collapse_deltas_for_merge(deltas, idx, shape, new_idx):
|
||||
"""Collapse deltas defined on idx to the list of configs that is defined by new_idx.
|
||||
If idx and new_idx are of type range, the smallest
|
||||
common divisor of the step sizes is used as new step size.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
deltas : list
|
||||
List of fluctuations
|
||||
idx : list
|
||||
List or range of configs on which the deltas are defined.
|
||||
Has to be a subset of new_idx and has to be sorted in ascending order.
|
||||
shape : list
|
||||
Number of configs in idx.
|
||||
new_idx : list
|
||||
List of configs that defines the new range, has to be sorted in ascending order.
|
||||
"""
|
||||
|
||||
if type(idx) is range and type(new_idx) is range:
|
||||
if idx == new_idx:
|
||||
return deltas
|
||||
ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
|
||||
for i in range(shape):
|
||||
if idx[i] in new_idx:
|
||||
ret[idx[i] - new_idx[0]] = deltas[i]
|
||||
return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
|
||||
|
||||
|
||||
def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
|
||||
"""Filter out all configurations with vanishing fluctuation such that they do not
|
||||
contribute to the error estimate anymore. Returns the new deltas and
|
||||
|
@ -1429,8 +1492,8 @@ def _covariance_element(obs1, obs2):
|
|||
"""Estimates the covariance of two Obs objects, neglecting autocorrelations."""
|
||||
|
||||
def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
|
||||
deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
|
||||
deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
|
||||
deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
|
||||
deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
|
||||
return np.sum(deltas1 * deltas2)
|
||||
|
||||
if set(obs1.names).isdisjoint(set(obs2.names)):
|
||||
|
@ -1450,29 +1513,30 @@ def _covariance_element(obs1, obs2):
|
|||
for r_name in obs1.e_content[e_name]:
|
||||
if r_name not in obs2.e_content[e_name]:
|
||||
continue
|
||||
idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]])
|
||||
idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
|
||||
|
||||
gamma = 0.0
|
||||
|
||||
for r_name in obs1.e_content[e_name]:
|
||||
if r_name not in obs2.e_content[e_name]:
|
||||
continue
|
||||
if len(idl_d[r_name]) == 0:
|
||||
continue
|
||||
gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
|
||||
|
||||
if gamma == 0.0:
|
||||
continue
|
||||
|
||||
gamma_div = 0.0
|
||||
e_N = 0
|
||||
for r_name in obs1.e_content[e_name]:
|
||||
if r_name not in obs2.e_content[e_name]:
|
||||
continue
|
||||
gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
|
||||
e_N += len(idl_d[r_name])
|
||||
gamma /= max(gamma_div, 1.0)
|
||||
if len(idl_d[r_name]) == 0:
|
||||
continue
|
||||
gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
|
||||
gamma /= gamma_div
|
||||
|
||||
# Bias correction hep-lat/0306017 eq. (49)
|
||||
dvalue += (1 + 1 / e_N) * gamma / e_N
|
||||
dvalue += gamma
|
||||
|
||||
for e_name in obs1.cov_names:
|
||||
|
||||
|
|
|
@ -515,6 +515,35 @@ def test_merge_idx():
|
|||
assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50)
|
||||
|
||||
|
||||
def test_intersection_idx():
|
||||
assert pe.obs._intersection_idx([range(1, 100), range(1, 100), range(1, 100)]) == range(1, 100)
|
||||
assert pe.obs._intersection_idx([range(1, 100, 10), range(1, 100, 2)]) == range(1, 100, 10)
|
||||
assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50)
|
||||
assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250)
|
||||
|
||||
for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]:
|
||||
assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids])
|
||||
|
||||
def test_merge_intersection():
|
||||
for idl_list in [[range(1, 100), range(1, 100), range(1, 100)],
|
||||
[range(4, 80, 6), range(4, 80, 6)],
|
||||
[[0, 2, 8, 19, 205], [0, 2, 8, 19, 205]]]:
|
||||
assert pe.obs._merge_idx(idl_list) == pe.obs._intersection_idx(idl_list)
|
||||
|
||||
|
||||
def test_intersection_collapse():
|
||||
range1 = range(1, 2000, 2)
|
||||
range2 = range(2, 2001, 8)
|
||||
|
||||
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
|
||||
obs_merge = obs1 + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
|
||||
|
||||
intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]])
|
||||
coll = pe.obs._collapse_deltas_for_merge(obs_merge.deltas["ens"], obs_merge.idl["ens"], len(obs_merge.idl["ens"]), range1)
|
||||
|
||||
assert np.all(coll == obs1.deltas["ens"])
|
||||
|
||||
|
||||
def test_irregular_error_propagation():
|
||||
obs_list = [pe.Obs([np.random.rand(100)], ['t']),
|
||||
pe.Obs([np.random.rand(50)], ['t'], idl=[range(1, 100, 2)]),
|
||||
|
@ -619,6 +648,26 @@ def test_covariance_is_variance():
|
|||
assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1])
|
||||
|
||||
|
||||
def test_covariance_vs_numpy():
|
||||
N = 1078
|
||||
data1 = np.random.normal(2.5, 0.2, N)
|
||||
data2 = np.random.normal(0.5, 0.08, N)
|
||||
data3 = np.random.normal(-178, 5, N)
|
||||
uncorr = np.row_stack([data1, data2, data3])
|
||||
corr = np.random.multivariate_normal([0.0, 17, -0.0487], [[1.0, 0.6, -0.22], [0.6, 0.8, 0.01], [-0.22, 0.01, 1.9]], N).T
|
||||
|
||||
for X in [uncorr, corr]:
|
||||
obs1 = pe.Obs([X[0]], ["ens1"])
|
||||
obs2 = pe.Obs([X[1]], ["ens1"])
|
||||
obs3 = pe.Obs([X[2]], ["ens1"])
|
||||
obs1.gamma_method(S=0.0)
|
||||
obs2.gamma_method(S=0.0)
|
||||
obs3.gamma_method(S=0.0)
|
||||
pe_cov = pe.covariance([obs1, obs2, obs3])
|
||||
np_cov = np.cov(X) / N
|
||||
assert np.allclose(pe_cov, np_cov, atol=1e-14)
|
||||
|
||||
|
||||
def test_covariance_symmetry():
|
||||
value1 = np.random.normal(5, 10)
|
||||
dvalue1 = np.abs(np.random.normal(0, 1))
|
||||
|
@ -729,6 +778,86 @@ def test_covariance_idl():
|
|||
pe.covariance([obs1, obs2])
|
||||
|
||||
|
||||
def test_correlation_intersection_of_idls():
|
||||
range1 = range(1, 2000, 2)
|
||||
range2 = range(2, 2001, 2)
|
||||
|
||||
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
|
||||
obs2_a = 0.4 * pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + 0.6 * obs1
|
||||
obs1.gamma_method()
|
||||
obs2_a.gamma_method()
|
||||
|
||||
cov1 = pe.covariance([obs1, obs2_a])
|
||||
corr1 = pe.covariance([obs1, obs2_a], correlation=True)
|
||||
|
||||
obs2_b = obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
|
||||
obs2_b.gamma_method()
|
||||
|
||||
cov2 = pe.covariance([obs1, obs2_b])
|
||||
corr2 = pe.covariance([obs1, obs2_b], correlation=True)
|
||||
|
||||
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
|
||||
assert cov1[0, 1] > cov2[0, 1]
|
||||
|
||||
obs2_c = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
|
||||
obs2_c.gamma_method()
|
||||
assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14)
|
||||
|
||||
|
||||
def test_covariance_non_identical_objects():
|
||||
obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"])
|
||||
obs1.gamma_method()
|
||||
obs2 = obs1 + 1e-18
|
||||
obs2.gamma_method()
|
||||
assert obs1 == obs2
|
||||
assert obs1 is not obs2
|
||||
assert np.allclose(np.ones((2, 2)), pe.covariance([obs1, obs2], correlation=True), atol=1e-14)
|
||||
|
||||
|
||||
def test_covariance_additional_non_overlapping_data():
|
||||
range1 = range(1, 20, 2)
|
||||
|
||||
data2 = np.random.normal(0.0, 0.1, len(range1))
|
||||
|
||||
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
|
||||
obs2_a = pe.Obs([data2], ["ens"], idl=[range1])
|
||||
obs1.gamma_method()
|
||||
obs2_a.gamma_method()
|
||||
|
||||
corr1 = pe.covariance([obs1, obs2_a], correlation=True)
|
||||
|
||||
added_data = np.random.normal(0.0, 0.1, len(range1))
|
||||
added_data -= np.mean(added_data) - np.mean(data2)
|
||||
data2_extended = np.ravel([data2, added_data], 'F')
|
||||
|
||||
obs2_b = pe.Obs([data2_extended], ["ens"])
|
||||
obs2_b.gamma_method()
|
||||
|
||||
corr2 = pe.covariance([obs1, obs2_b], correlation=True)
|
||||
|
||||
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
|
||||
|
||||
|
||||
def test_coavariance_reorder_non_overlapping_data():
|
||||
range1 = range(1, 20, 2)
|
||||
range2 = range(1, 41, 2)
|
||||
|
||||
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
|
||||
obs2_b = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
|
||||
obs1.gamma_method()
|
||||
obs2_b.gamma_method()
|
||||
|
||||
corr1 = pe.covariance([obs1, obs2_b], correlation=True)
|
||||
|
||||
deltas = list(obs2_b.deltas['ens'][:len(range1)]) + sorted(obs2_b.deltas['ens'][len(range1):])
|
||||
obs2_a = pe.Obs([obs2_b.value + np.array(deltas)], ["ens"], idl=[range2])
|
||||
obs2_a.gamma_method()
|
||||
|
||||
corr2 = pe.covariance([obs1, obs2_a], correlation=True)
|
||||
|
||||
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
|
||||
|
||||
|
||||
def test_empty_obs():
|
||||
o = pe.Obs([np.random.rand(100)], ['test'])
|
||||
q = o + pe.Obs([], [], means=[])
|
||||
|
|
Loading…
Add table
Reference in a new issue