From 1db59a9fdc91d8117efd1b86fbacaf42c976b060 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 14:34:48 +0000 Subject: [PATCH 1/7] feat: derived_observable now uses covobs when an input is not an Obs. This should result in a small speedup for all operations as one iteration over all data can be dropped. --- pyerrors/obs.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index c27670ed..6188dc4f 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1053,17 +1053,9 @@ def derived_observable(func, data, array_mode=False, **kwargs): raveled_data = data.ravel() # Workaround for matrix operations containing non Obs data - # TODO: Find more elegant solution here. - for i_data in raveled_data: - if isinstance(i_data, Obs): - first_name = i_data.names[0] - first_shape = i_data.shape[first_name] - first_idl = i_data.idl[first_name] - break - for i in range(len(raveled_data)): if isinstance(raveled_data[i], (int, float)): - raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name], idl=[first_idl]) + raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "~#dummy_data#~") allcov = {} for o in raveled_data: From 5ced94e08635c5e991853d577ced10f2b66b10ee Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 15:00:44 +0000 Subject: [PATCH 2/7] feat: check for non Obs objects in derived observable optimized and only performed in array mode --- pyerrors/obs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 6188dc4f..7b13d8dd 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1053,9 +1053,11 @@ def derived_observable(func, data, array_mode=False, **kwargs): raveled_data = data.ravel() # Workaround for matrix operations containing non Obs data - for i in range(len(raveled_data)): - if isinstance(raveled_data[i], (int, float)): - raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "~#dummy_data#~") + if array_mode is True: + if not all(isinstance(x, Obs) for x in raveled_data): + for i in range(len(raveled_data)): + if isinstance(raveled_data[i], (int, float)): + raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_entry###") allcov = {} for o in raveled_data: From 3f0040a81545b7ca0847d0718d749fe49db1253d Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 15:09:40 +0000 Subject: [PATCH 3/7] refactor: generation of new r_values in derived_observable simplified. --- pyerrors/obs.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 7b13d8dd..681a398b 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1079,9 +1079,9 @@ def derived_observable(func, data, array_mode=False, **kwargs): for name in new_sample_names: idl = [] for i_data in raveled_data: - tmp = i_data.idl.get(name) - if tmp is not None: - idl.append(tmp) + tmp_idl = i_data.idl.get(name) + if tmp_idl is not None: + idl.append(tmp_idl) new_idl_d[name] = _merge_idx(idl) if not is_merged[name]: is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) @@ -1101,10 +1101,7 @@ def derived_observable(func, data, array_mode=False, **kwargs): for name in new_sample_names: tmp_values = np.zeros(n_obs) for i, item in enumerate(raveled_data): - tmp = item.r_values.get(name) - if tmp is None: - tmp = item.value - tmp_values[i] = tmp + tmp_values[i] = item.r_values.get(name, item.value) if multi > 0: tmp_values = np.array(tmp_values).reshape(data.shape) new_r_values[name] = func(tmp_values, **kwargs) From 140268c1c93190d1e8754ad276355e95249e833b Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 15:17:32 +0000 Subject: [PATCH 4/7] refactor: two loops over new_sample_names merged. --- pyerrors/obs.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 681a398b..60441456 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1075,16 +1075,6 @@ def derived_observable(func, data, array_mode=False, **kwargs): is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 - new_idl_d = {} - for name in new_sample_names: - idl = [] - for i_data in raveled_data: - tmp_idl = i_data.idl.get(name) - if tmp_idl is not None: - idl.append(tmp_idl) - new_idl_d[name] = _merge_idx(idl) - if not is_merged[name]: - is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) if data.ndim == 1: values = np.array([o.value for o in data]) @@ -1098,13 +1088,21 @@ def derived_observable(func, data, array_mode=False, **kwargs): multi = 1 new_r_values = {} + new_idl_d = {} for name in new_sample_names: + idl = [] tmp_values = np.zeros(n_obs) for i, item in enumerate(raveled_data): tmp_values[i] = item.r_values.get(name, item.value) + tmp_idl = item.idl.get(name) + if tmp_idl is not None: + idl.append(tmp_idl) if multi > 0: tmp_values = np.array(tmp_values).reshape(data.shape) new_r_values[name] = func(tmp_values, **kwargs) + new_idl_d[name] = _merge_idx(idl) + if not is_merged[name]: + is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) if 'man_grad' in kwargs: deriv = np.asarray(kwargs.get('man_grad')) From 52705d8fcdbf320c02ce69a52a1a0bd1d1d24db0 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 15:26:27 +0000 Subject: [PATCH 5/7] refactor: minor simplifications in derived_observable --- pyerrors/obs.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 60441456..a2deda6d 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1057,7 +1057,7 @@ def derived_observable(func, data, array_mode=False, **kwargs): if not all(isinstance(x, Obs) for x in raveled_data): for i in range(len(raveled_data)): if isinstance(raveled_data[i], (int, float)): - raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_entry###") + raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") allcov = {} for o in raveled_data: @@ -1083,9 +1083,7 @@ def derived_observable(func, data, array_mode=False, **kwargs): new_values = func(values, **kwargs) - multi = 0 - if isinstance(new_values, np.ndarray): - multi = 1 + multi = int(isinstance(new_values, np.ndarray)) new_r_values = {} new_idl_d = {} @@ -1137,13 +1135,11 @@ def derived_observable(func, data, array_mode=False, **kwargs): if array_mode is True: - new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) - class _Zero_grad(): def __init__(self, N): - # self.grad = np.zeros(N) self.grad = np.zeros((N, 1)) + new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) d_extracted = {} g_extracted = {} for name in new_sample_names: From 2702b5519d0e3dc2124867a4771f3ffc9b6106bb Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 16:11:44 +0000 Subject: [PATCH 6/7] refactor: loop and if clause eliminated in Obs.__init__ --- pyerrors/obs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index a2deda6d..4247de8e 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -119,6 +119,7 @@ class Obs: for name, sample in sorted(zip(names, samples)): self.idl[name] = range(1, len(sample) + 1) + self._value = 0 if means is not None: for name, sample, mean in sorted(zip(names, samples, means)): self.shape[name] = len(self.idl[name]) @@ -126,6 +127,7 @@ class Obs: raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) self.r_values[name] = mean self.deltas[name] = sample + self.N = sum(list(self.shape.values())) else: for name, sample in sorted(zip(names, samples)): self.shape[name] = len(self.idl[name]) @@ -133,14 +135,12 @@ class Obs: raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) self.r_values[name] = np.mean(sample) self.deltas[name] = sample - self.r_values[name] - self.is_merged = {} - self.N = sum(list(self.shape.values())) - - self._value = 0 - if means is None: - for name in self.names: self._value += self.shape[name] * self.r_values[name] + self.N = sum(list(self.shape.values())) self._value /= self.N + + self.is_merged = {} + else: self._value = 0 self.is_merged = {} From ae53daa915d5c3675c1c81461fe6fd963ddf3468 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 8 Dec 2021 16:14:48 +0000 Subject: [PATCH 7/7] refactor: calculation of N in Obs.__init__ optimized --- pyerrors/obs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 4247de8e..c24da74a 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -120,23 +120,24 @@ class Obs: self.idl[name] = range(1, len(sample) + 1) self._value = 0 + self.N = 0 if means is not None: for name, sample, mean in sorted(zip(names, samples, means)): self.shape[name] = len(self.idl[name]) + self.N += self.shape[name] if len(sample) != self.shape[name]: raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) self.r_values[name] = mean self.deltas[name] = sample - self.N = sum(list(self.shape.values())) else: for name, sample in sorted(zip(names, samples)): self.shape[name] = len(self.idl[name]) + self.N += self.shape[name] if len(sample) != self.shape[name]: raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) self.r_values[name] = np.mean(sample) self.deltas[name] = sample - self.r_values[name] self._value += self.shape[name] * self.r_values[name] - self.N = sum(list(self.shape.values())) self._value /= self.N self.is_merged = {}