Bootstrap export/import (#198)

* feat: export_bootstrap method added. * feat: import_bootstrap function added. * tests: first test for import/export bootstrap added. * feat: bootstrap feature cleaned up. * docs: boostrap docstrings improved.
2025-07-13 07:09:27 +02:00 · 2023-07-14 13:12:11 +01:00 · 2023-07-14 13:12:11 +01:00 · b62a18643e
commit b62a18643e
parent 2cd076dbd7
2 changed files with 88 additions and 0 deletions
--- a/pyerrors/obs.py
+++ b/pyerrors/obs.py
@ -3,6 +3,7 @@ import hashlib
 import pickle
 import numpy as np
 import autograd.numpy as anp  # Thinly-wrapped numpy
+import scipy
 from autograd import jacobian
 import matplotlib.pyplot as plt
 from scipy.stats import skew, skewtest, kurtosis, kurtosistest
@ -684,6 +685,49 @@ class Obs:
        tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
        return tmp_jacks

+    def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
+        """Export bootstrap samples from the Obs
+
+        Parameters
+        ----------
+        samples : int
+            Number of bootstrap samples to generate.
+        random_numbers : np.ndarray
+            Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
+            If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
+        save_rng : str
+            Save the random numbers to a file if a path is specified.
+
+        Returns
+        -------
+        numpy.ndarray
+            Returns a numpy array of length N + 1 where N is the number of samples
+            for the given ensemble and replicum. The zeroth entry of the array contains
+            the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
+            derived from the Obs. The current implementation only works for observables
+            defined on exactly one ensemble and replicum. The derived bootstrap samples
+            should agree with samples from a full bootstrap analysis up to O(1/N).
+        """
+        if len(self.names) != 1:
+            raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
+
+        name = self.names[0]
+        length = self.N
+
+        if random_numbers is None:
+            seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
+            rng = np.random.default_rng(seed)
+            random_numbers = rng.integers(0, length, size=(samples, length))
+
+        if save_rng is not None:
+            np.savetxt(save_rng, random_numbers, fmt='%i')
+
+        proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
+        ret = np.zeros(samples + 1)
+        ret[0] = self.value
+        ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
+        return ret
+
    def __float__(self):
        return float(self.value)

@ -1550,6 +1594,36 @@ def import_jackknife(jacks, name, idl=None):
    return new_obs


+def import_bootstrap(boots, name, random_numbers):
+    """Imports bootstrap samples and returns an Obs
+
+    Parameters
+    ----------
+    boots : numpy.ndarray
+        numpy array containing the mean value as zeroth entry and
+        the N bootstrap samples as first to Nth entry.
+    name : str
+        name of the ensemble the samples are defined on.
+    random_numbers : np.ndarray
+        Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
+        where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
+        chain to be reconstructed.
+    """
+    samples, length = random_numbers.shape
+    if samples != len(boots) - 1:
+        raise ValueError("Random numbers do not have the correct shape.")
+
+    if samples < length:
+        raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
+
+    proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
+
+    samples = scipy.linalg.lstsq(proj, boots[1:])[0]
+    ret = Obs([samples], [name])
+    ret._value = boots[0]
+    return ret
+
+
 def merge_obs(list_of_obs):
    """Combine all observables in list_of_obs into one new observable

--- a/tests/obs_test.py
+++ b/tests/obs_test.py
@ -1094,6 +1094,20 @@ def test_import_jackknife():
    assert my_obs == reconstructed_obs


+def test_import_bootstrap():
+    seed = 4321
+    samples = 1234
+    length = 820
+    name = "test"
+
+    rng = np.random.default_rng(seed)
+    random_numbers = rng.integers(0, length, size=(samples, length))
+    obs = pe.pseudo_Obs(2.447, 0.14, name, length)
+    boots = obs.export_bootstrap(1234, random_numbers=random_numbers)
+    re_obs = pe.import_bootstrap(boots, name, random_numbers=random_numbers)
+    assert obs == re_obs
+
+
 def test_reduce_deltas():
    idx_old = range(1, 101)
    deltas = [float(i) for i in idx_old]