diff --git a/pyerrors/obs.py b/pyerrors/obs.py index e32ca821..85fdf851 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -3,6 +3,7 @@ import hashlib import pickle import numpy as np import autograd.numpy as anp # Thinly-wrapped numpy +import scipy from autograd import jacobian import matplotlib.pyplot as plt from scipy.stats import skew, skewtest, kurtosis, kurtosistest @@ -684,6 +685,49 @@ class Obs: tmp_jacks[1:] = (n * mean - full_data) / (n - 1) return tmp_jacks + def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): + """Export bootstrap samples from the Obs + + Parameters + ---------- + samples : int + Number of bootstrap samples to generate. + random_numbers : np.ndarray + Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. + If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. + save_rng : str + Save the random numbers to a file if a path is specified. + + Returns + ------- + numpy.ndarray + Returns a numpy array of length N + 1 where N is the number of samples + for the given ensemble and replicum. The zeroth entry of the array contains + the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples + derived from the Obs. The current implementation only works for observables + defined on exactly one ensemble and replicum. The derived bootstrap samples + should agree with samples from a full bootstrap analysis up to O(1/N). + """ + if len(self.names) != 1: + raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") + + name = self.names[0] + length = self.N + + if random_numbers is None: + seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF + rng = np.random.default_rng(seed) + random_numbers = rng.integers(0, length, size=(samples, length)) + + if save_rng is not None: + np.savetxt(save_rng, random_numbers, fmt='%i') + + proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length + ret = np.zeros(samples + 1) + ret[0] = self.value + ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) + return ret + def __float__(self): return float(self.value) @@ -1550,6 +1594,36 @@ def import_jackknife(jacks, name, idl=None): return new_obs +def import_bootstrap(boots, name, random_numbers): + """Imports bootstrap samples and returns an Obs + + Parameters + ---------- + boots : numpy.ndarray + numpy array containing the mean value as zeroth entry and + the N bootstrap samples as first to Nth entry. + name : str + name of the ensemble the samples are defined on. + random_numbers : np.ndarray + Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, + where samples is the number of bootstrap samples and length is the length of the original Monte Carlo + chain to be reconstructed. + """ + samples, length = random_numbers.shape + if samples != len(boots) - 1: + raise ValueError("Random numbers do not have the correct shape.") + + if samples < length: + raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") + + proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length + + samples = scipy.linalg.lstsq(proj, boots[1:])[0] + ret = Obs([samples], [name]) + ret._value = boots[0] + return ret + + def merge_obs(list_of_obs): """Combine all observables in list_of_obs into one new observable diff --git a/tests/obs_test.py b/tests/obs_test.py index c8b741c2..ed538943 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -1094,6 +1094,20 @@ def test_import_jackknife(): assert my_obs == reconstructed_obs +def test_import_bootstrap(): + seed = 4321 + samples = 1234 + length = 820 + name = "test" + + rng = np.random.default_rng(seed) + random_numbers = rng.integers(0, length, size=(samples, length)) + obs = pe.pseudo_Obs(2.447, 0.14, name, length) + boots = obs.export_bootstrap(1234, random_numbers=random_numbers) + re_obs = pe.import_bootstrap(boots, name, random_numbers=random_numbers) + assert obs == re_obs + + def test_reduce_deltas(): idx_old = range(1, 101) deltas = [float(i) for i in idx_old]