Bootstrap export/import (#198)

* feat: export_bootstrap method added.

* feat: import_bootstrap function added.

* tests: first test for import/export bootstrap added.

* feat: bootstrap feature cleaned up.

* docs: boostrap docstrings improved.
This commit is contained in:
Fabian Joswig 2023-07-14 13:12:11 +01:00 committed by GitHub
parent 2cd076dbd7
commit b62a18643e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 88 additions and 0 deletions

View file

@ -3,6 +3,7 @@ import hashlib
import pickle
import numpy as np
import autograd.numpy as anp # Thinly-wrapped numpy
import scipy
from autograd import jacobian
import matplotlib.pyplot as plt
from scipy.stats import skew, skewtest, kurtosis, kurtosistest
@ -684,6 +685,49 @@ class Obs:
tmp_jacks[1:] = (n * mean - full_data) / (n - 1)
return tmp_jacks
def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None):
"""Export bootstrap samples from the Obs
Parameters
----------
samples : int
Number of bootstrap samples to generate.
random_numbers : np.ndarray
Array of shape (samples, length) containing the random numbers to generate the bootstrap samples.
If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
save_rng : str
Save the random numbers to a file if a path is specified.
Returns
-------
numpy.ndarray
Returns a numpy array of length N + 1 where N is the number of samples
for the given ensemble and replicum. The zeroth entry of the array contains
the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples
derived from the Obs. The current implementation only works for observables
defined on exactly one ensemble and replicum. The derived bootstrap samples
should agree with samples from a full bootstrap analysis up to O(1/N).
"""
if len(self.names) != 1:
raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.")
name = self.names[0]
length = self.N
if random_numbers is None:
seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF
rng = np.random.default_rng(seed)
random_numbers = rng.integers(0, length, size=(samples, length))
if save_rng is not None:
np.savetxt(save_rng, random_numbers, fmt='%i')
proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
ret = np.zeros(samples + 1)
ret[0] = self.value
ret[1:] = proj @ (self.deltas[name] + self.r_values[name])
return ret
def __float__(self):
return float(self.value)
@ -1550,6 +1594,36 @@ def import_jackknife(jacks, name, idl=None):
return new_obs
def import_bootstrap(boots, name, random_numbers):
"""Imports bootstrap samples and returns an Obs
Parameters
----------
boots : numpy.ndarray
numpy array containing the mean value as zeroth entry and
the N bootstrap samples as first to Nth entry.
name : str
name of the ensemble the samples are defined on.
random_numbers : np.ndarray
Array of shape (samples, length) containing the random numbers to generate the bootstrap samples,
where samples is the number of bootstrap samples and length is the length of the original Monte Carlo
chain to be reconstructed.
"""
samples, length = random_numbers.shape
if samples != len(boots) - 1:
raise ValueError("Random numbers do not have the correct shape.")
if samples < length:
raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.")
proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length
samples = scipy.linalg.lstsq(proj, boots[1:])[0]
ret = Obs([samples], [name])
ret._value = boots[0]
return ret
def merge_obs(list_of_obs):
"""Combine all observables in list_of_obs into one new observable

View file

@ -1094,6 +1094,20 @@ def test_import_jackknife():
assert my_obs == reconstructed_obs
def test_import_bootstrap():
seed = 4321
samples = 1234
length = 820
name = "test"
rng = np.random.default_rng(seed)
random_numbers = rng.integers(0, length, size=(samples, length))
obs = pe.pseudo_Obs(2.447, 0.14, name, length)
boots = obs.export_bootstrap(1234, random_numbers=random_numbers)
re_obs = pe.import_bootstrap(boots, name, random_numbers=random_numbers)
assert obs == re_obs
def test_reduce_deltas():
idx_old = range(1, 101)
deltas = [float(i) for i in idx_old]