Bootstrap export/import (#198)

* feat: export_bootstrap method added. * feat: import_bootstrap function added. * tests: first test for import/export bootstrap added. * feat: bootstrap feature cleaned up. * docs: boostrap docstrings improved.
2025-06-30 00:39:27 +02:00 · 2023-07-14 13:12:11 +01:00 · 2023-07-14 13:12:11 +01:00 · b62a18643e
commit b62a18643e
parent 2cd076dbd7
2 changed files with 88 additions and 0 deletions
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@ -3,6 +3,7 @@ import hashlib
 import pickle
 import numpy as np
 import autograd.numpy as anp  # Thinly-wrapped numpy
 import scipy
 from autograd import jacobian
 import matplotlib.pyplot as plt
 from scipy.stats import skew, skewtest, kurtosis, kurtosistest
@ -684,6 +685,49 @@ class Obs:
        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
        return tmp_jacks
    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
        """Export bootstrap samples from the Obs
        Parameters
        ----------
        samples : int
            Number of bootstrap samples to generate.
        random_numbers : np.ndarray
            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
        save_rng : str
            Save the random numbers to a file if a path is specified.
        Returns
        -------
        numpy.ndarray
            Returns a numpy array of length N + 1 where N is the number of samples
            for the given ensemble and replicum. The zeroth entry of the array contains
            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
            derived from the Obs. The current implementation only works for observables
            defined on exactly one ensemble and replicum. The derived bootstrap samples
            should agree with samples from a full bootstrap analysis up to O(1/N).
        """
        if len(self.names) != 1:
            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
        name = self.names[0]
        length = self.N
        if random_numbers is None:
            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
            rng = np.random.default_rng(seed)
            random_numbers = rng.integers(0, length, size=(samples, length))
        if save_rng is not None:
            np.savetxt(save_rng, random_numbers, fmt='%i')
        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
        ret = np.zeros(samples + 1)
        ret[0] = self.value
        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
        return ret
    def __float__(self):
        return float(self.value)
@ -1550,6 +1594,36 @@ def import_jackknife(jacks, name, idl=None):
    return new_obs
 def import_bootstrap(boots, name, random_numbers):
    """Imports bootstrap samples and returns an Obs
    Parameters
    ----------
    boots : numpy.ndarray
        numpy array containing the mean value as zeroth entry and
        the N bootstrap samples as first to Nth entry.
    name : str
        name of the ensemble the samples are defined on.
    random_numbers : np.ndarray
        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
        chain to be reconstructed.
    """
    samples, length = random_numbers.shape
    if samples != len(boots) - 1:
        raise ValueError("Random numbers do not have the correct shape.")
    if samples < length:
        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
    ret = Obs([samples], [name])
    ret._value = boots[0]
    return ret
 def merge_obs(list_of_obs):
    """Combine all observables in list_of_obs into one new observable
--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@ -1094,6 +1094,20 @@ def test_import_jackknife():
    assert my_obs == reconstructed_obs
 def test_import_bootstrap():
    seed = 4321
    samples = 1234
    length = 820
    name = "test"
    rng = np.random.default_rng(seed)
    random_numbers = rng.integers(0, length, size=(samples, length))
    obs = pe.pseudo_Obs(2.447, 0.14, name, length)
    boots = obs.export_bootstrap(1234, random_numbers=random_numbers)
    re_obs = pe.import_bootstrap(boots, name, random_numbers=random_numbers)
    assert obs == re_obs
 def test_reduce_deltas():
    idx_old = range(1, 101)
    deltas = [float(i) for i in idx_old]