Merge pull request #102 from fjosw/feature/covariance_idl2

Feature/covariance idl2
This commit is contained in:
Fabian Joswig 2022-05-26 15:01:35 +01:00 committed by GitHub
commit f02488208a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 202 additions and 9 deletions

View file

@ -1,5 +1,7 @@
import warnings import warnings
import pickle import pickle
from math import gcd
from functools import reduce
import numpy as np import numpy as np
import autograd.numpy as anp # Thinly-wrapped numpy import autograd.numpy as anp # Thinly-wrapped numpy
from autograd import jacobian from autograd import jacobian
@ -981,6 +983,39 @@ def _merge_idx(idl):
return sorted(set().union(*idl)) return sorted(set().union(*idl))
def _intersection_idx(idl):
"""Returns the intersection of all lists in idl as sorted list
Parameters
----------
idl : list
List of lists or ranges.
"""
def _lcm(*args):
"""Returns the lowest common multiple of args.
From python 3.9 onwards the math library contains an lcm function."""
return reduce(lambda a, b: a * b // gcd(a, b), args)
# Use groupby to efficiently check whether all elements of idl are identical
try:
g = groupby(idl)
if next(g, True) and not next(g, False):
return idl[0]
except Exception:
pass
if np.all([type(idx) is range for idx in idl]):
if len(set([idx[0] for idx in idl])) == 1:
idstart = max([idx.start for idx in idl])
idstop = min([idx.stop for idx in idl])
idstep = _lcm(*[idx.step for idx in idl])
return range(idstart, idstop, idstep)
return sorted(set.intersection(*[set(o) for o in idl]))
def _expand_deltas_for_merge(deltas, idx, shape, new_idx): def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
"""Expand deltas defined on idx to the list of configs that is defined by new_idx. """Expand deltas defined on idx to the list of configs that is defined by new_idx.
New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
@ -1008,6 +1043,34 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
def _collapse_deltas_for_merge(deltas, idx, shape, new_idx):
"""Collapse deltas defined on idx to the list of configs that is defined by new_idx.
If idx and new_idx are of type range, the smallest
common divisor of the step sizes is used as new step size.
Parameters
----------
deltas : list
List of fluctuations
idx : list
List or range of configs on which the deltas are defined.
Has to be a subset of new_idx and has to be sorted in ascending order.
shape : list
Number of configs in idx.
new_idx : list
List of configs that defines the new range, has to be sorted in ascending order.
"""
if type(idx) is range and type(new_idx) is range:
if idx == new_idx:
return deltas
ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
for i in range(shape):
if idx[i] in new_idx:
ret[idx[i] - new_idx[0]] = deltas[i]
return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): def _filter_zeroes(deltas, idx, eps=Obs.filter_eps):
"""Filter out all configurations with vanishing fluctuation such that they do not """Filter out all configurations with vanishing fluctuation such that they do not
contribute to the error estimate anymore. Returns the new deltas and contribute to the error estimate anymore. Returns the new deltas and
@ -1429,8 +1492,8 @@ def _covariance_element(obs1, obs2):
"""Estimates the covariance of two Obs objects, neglecting autocorrelations.""" """Estimates the covariance of two Obs objects, neglecting autocorrelations."""
def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
return np.sum(deltas1 * deltas2) return np.sum(deltas1 * deltas2)
if set(obs1.names).isdisjoint(set(obs2.names)): if set(obs1.names).isdisjoint(set(obs2.names)):
@ -1450,29 +1513,30 @@ def _covariance_element(obs1, obs2):
for r_name in obs1.e_content[e_name]: for r_name in obs1.e_content[e_name]:
if r_name not in obs2.e_content[e_name]: if r_name not in obs2.e_content[e_name]:
continue continue
idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]])
gamma = 0.0 gamma = 0.0
for r_name in obs1.e_content[e_name]: for r_name in obs1.e_content[e_name]:
if r_name not in obs2.e_content[e_name]: if r_name not in obs2.e_content[e_name]:
continue continue
if len(idl_d[r_name]) == 0:
continue
gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name])
if gamma == 0.0: if gamma == 0.0:
continue continue
gamma_div = 0.0 gamma_div = 0.0
e_N = 0
for r_name in obs1.e_content[e_name]: for r_name in obs1.e_content[e_name]:
if r_name not in obs2.e_content[e_name]: if r_name not in obs2.e_content[e_name]:
continue continue
gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) if len(idl_d[r_name]) == 0:
e_N += len(idl_d[r_name]) continue
gamma /= max(gamma_div, 1.0) gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name]))
gamma /= gamma_div
# Bias correction hep-lat/0306017 eq. (49) dvalue += gamma
dvalue += (1 + 1 / e_N) * gamma / e_N
for e_name in obs1.cov_names: for e_name in obs1.cov_names:

View file

@ -515,6 +515,35 @@ def test_merge_idx():
assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50) assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50)
def test_intersection_idx():
assert pe.obs._intersection_idx([range(1, 100), range(1, 100), range(1, 100)]) == range(1, 100)
assert pe.obs._intersection_idx([range(1, 100, 10), range(1, 100, 2)]) == range(1, 100, 10)
assert pe.obs._intersection_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 50)
assert pe.obs._intersection_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6050, 250)
for ids in [[list(range(1, 80, 3)), list(range(1, 100, 2))], [range(1, 80, 3), range(1, 100, 2), range(1, 100, 7)]]:
assert list(pe.obs._intersection_idx(ids)) == pe.obs._intersection_idx([list(o) for o in ids])
def test_merge_intersection():
for idl_list in [[range(1, 100), range(1, 100), range(1, 100)],
[range(4, 80, 6), range(4, 80, 6)],
[[0, 2, 8, 19, 205], [0, 2, 8, 19, 205]]]:
assert pe.obs._merge_idx(idl_list) == pe.obs._intersection_idx(idl_list)
def test_intersection_collapse():
range1 = range(1, 2000, 2)
range2 = range(2, 2001, 8)
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
obs_merge = obs1 + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]])
coll = pe.obs._collapse_deltas_for_merge(obs_merge.deltas["ens"], obs_merge.idl["ens"], len(obs_merge.idl["ens"]), range1)
assert np.all(coll == obs1.deltas["ens"])
def test_irregular_error_propagation(): def test_irregular_error_propagation():
obs_list = [pe.Obs([np.random.rand(100)], ['t']), obs_list = [pe.Obs([np.random.rand(100)], ['t']),
pe.Obs([np.random.rand(50)], ['t'], idl=[range(1, 100, 2)]), pe.Obs([np.random.rand(50)], ['t'], idl=[range(1, 100, 2)]),
@ -619,6 +648,26 @@ def test_covariance_is_variance():
assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1]) assert np.isclose(test_obs.dvalue ** 2, pe.covariance([test_obs, test_obs])[0, 1])
def test_covariance_vs_numpy():
N = 1078
data1 = np.random.normal(2.5, 0.2, N)
data2 = np.random.normal(0.5, 0.08, N)
data3 = np.random.normal(-178, 5, N)
uncorr = np.row_stack([data1, data2, data3])
corr = np.random.multivariate_normal([0.0, 17, -0.0487], [[1.0, 0.6, -0.22], [0.6, 0.8, 0.01], [-0.22, 0.01, 1.9]], N).T
for X in [uncorr, corr]:
obs1 = pe.Obs([X[0]], ["ens1"])
obs2 = pe.Obs([X[1]], ["ens1"])
obs3 = pe.Obs([X[2]], ["ens1"])
obs1.gamma_method(S=0.0)
obs2.gamma_method(S=0.0)
obs3.gamma_method(S=0.0)
pe_cov = pe.covariance([obs1, obs2, obs3])
np_cov = np.cov(X) / N
assert np.allclose(pe_cov, np_cov, atol=1e-14)
def test_covariance_symmetry(): def test_covariance_symmetry():
value1 = np.random.normal(5, 10) value1 = np.random.normal(5, 10)
dvalue1 = np.abs(np.random.normal(0, 1)) dvalue1 = np.abs(np.random.normal(0, 1))
@ -729,6 +778,86 @@ def test_covariance_idl():
pe.covariance([obs1, obs2]) pe.covariance([obs1, obs2])
def test_correlation_intersection_of_idls():
range1 = range(1, 2000, 2)
range2 = range(2, 2001, 2)
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
obs2_a = 0.4 * pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1]) + 0.6 * obs1
obs1.gamma_method()
obs2_a.gamma_method()
cov1 = pe.covariance([obs1, obs2_a])
corr1 = pe.covariance([obs1, obs2_a], correlation=True)
obs2_b = obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
obs2_b.gamma_method()
cov2 = pe.covariance([obs1, obs2_b])
corr2 = pe.covariance([obs1, obs2_b], correlation=True)
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
assert cov1[0, 1] > cov2[0, 1]
obs2_c = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
obs2_c.gamma_method()
assert np.isclose(0, pe.covariance([obs1, obs2_c])[0, 1], atol=1e-14)
def test_covariance_non_identical_objects():
obs1 = pe.Obs([np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 1000), np.random.normal(1.0, 0.1, 732)], ["ens|r1", "ens|r2", "ens2"])
obs1.gamma_method()
obs2 = obs1 + 1e-18
obs2.gamma_method()
assert obs1 == obs2
assert obs1 is not obs2
assert np.allclose(np.ones((2, 2)), pe.covariance([obs1, obs2], correlation=True), atol=1e-14)
def test_covariance_additional_non_overlapping_data():
range1 = range(1, 20, 2)
data2 = np.random.normal(0.0, 0.1, len(range1))
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
obs2_a = pe.Obs([data2], ["ens"], idl=[range1])
obs1.gamma_method()
obs2_a.gamma_method()
corr1 = pe.covariance([obs1, obs2_a], correlation=True)
added_data = np.random.normal(0.0, 0.1, len(range1))
added_data -= np.mean(added_data) - np.mean(data2)
data2_extended = np.ravel([data2, added_data], 'F')
obs2_b = pe.Obs([data2_extended], ["ens"])
obs2_b.gamma_method()
corr2 = pe.covariance([obs1, obs2_b], correlation=True)
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
def test_coavariance_reorder_non_overlapping_data():
range1 = range(1, 20, 2)
range2 = range(1, 41, 2)
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
obs2_b = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
obs1.gamma_method()
obs2_b.gamma_method()
corr1 = pe.covariance([obs1, obs2_b], correlation=True)
deltas = list(obs2_b.deltas['ens'][:len(range1)]) + sorted(obs2_b.deltas['ens'][len(range1):])
obs2_a = pe.Obs([obs2_b.value + np.array(deltas)], ["ens"], idl=[range2])
obs2_a.gamma_method()
corr2 = pe.covariance([obs1, obs2_a], correlation=True)
assert np.isclose(corr1[0, 1], corr2[0, 1], atol=1e-14)
def test_empty_obs(): def test_empty_obs():
o = pe.Obs([np.random.rand(100)], ['test']) o = pe.Obs([np.random.rand(100)], ['test'])
q = o + pe.Obs([], [], means=[]) q = o + pe.Obs([], [], means=[])