From d5a766ee169e4663046d9329d6cbd1ffa1a097a3 Mon Sep 17 00:00:00 2001 From: Simon Kuberski Date: Fri, 21 Oct 2022 11:05:54 +0200 Subject: [PATCH] feat: Speed up covariance for irregular MC chains --- pyerrors/obs.py | 57 +++++++++++------------------------------------ tests/obs_test.py | 4 ++-- 2 files changed, 15 insertions(+), 46 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index e0027fec..b50cc9af 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1097,34 +1097,6 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx): return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) -def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): - """Collapse deltas defined on idx to the list of configs that is defined by new_idx. - If idx and new_idx are of type range, the smallest - common divisor of the step sizes is used as new step size. - - Parameters - ---------- - deltas : list - List of fluctuations - idx : list - List or range of configs on which the deltas are defined. - Has to be a subset of new_idx and has to be sorted in ascending order. - shape : list - Number of configs in idx. - new_idx : list - List of configs that defines the new range, has to be sorted in ascending order. - """ - - if type(idx) is range and type(new_idx) is range: - if idx == new_idx: - return deltas - ret = np.zeros(new_idx[-1] - new_idx[0] + 1) - for i in range(shape): - if idx[i] in new_idx: - ret[idx[i] - new_idx[0]] = deltas[i] - return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) - - def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): """Filter out all configurations with vanishing fluctuation such that they do not contribute to the error estimate anymore. Returns the new deltas and @@ -1355,20 +1327,17 @@ def _reduce_deltas(deltas, idx_old, idx_new): if type(idx_old) is range and type(idx_new) is range: if idx_old == idx_new: return deltas - shape = len(idx_new) - ret = np.zeros(shape) - oldpos = 0 - for i in range(shape): - pos = -1 - for j in range(oldpos, len(idx_old)): - if idx_old[j] == idx_new[i]: - pos = j - break - if pos < 0: - raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) - ret[i] = deltas[pos] - oldpos = pos - return np.array(ret) + # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical + try: + g = groupby([idx_old, idx_new]) + if next(g, True) and not next(g, False): + return deltas + except Exception: + pass + indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] + if len(indices) < len(idx_new): + raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') + return np.array(deltas)[indices] def reweight(weight, obs, **kwargs): @@ -1546,8 +1515,8 @@ def _covariance_element(obs1, obs2): """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): - deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) - deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) + deltas1 = _reduce_deltas(deltas1, idx1, new_idx) + deltas2 = _reduce_deltas(deltas2, idx2, new_idx) return np.sum(deltas1 * deltas2) if set(obs1.names).isdisjoint(set(obs2.names)): diff --git a/tests/obs_test.py b/tests/obs_test.py index 5c6565fe..8f5aa547 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -534,7 +534,7 @@ def test_merge_intersection(): assert pe.obs._merge_idx(idl_list) == pe.obs._intersection_idx(idl_list) -def test_intersection_collapse(): +def test_intersection_reduce(): range1 = range(1, 2000, 2) range2 = range(2, 2001, 8) @@ -542,7 +542,7 @@ def test_intersection_collapse(): obs_merge = obs1 + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2]) intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]]) - coll = pe.obs._collapse_deltas_for_merge(obs_merge.deltas["ens"], obs_merge.idl["ens"], len(obs_merge.idl["ens"]), range1) + coll = pe.obs._reduce_deltas(obs_merge.deltas["ens"], obs_merge.idl["ens"], range1) assert np.all(coll == obs1.deltas["ens"])