From b62a18643ed84e680810f66a0bb528969141ae66 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 14 Jul 2023 13:12:11 +0100 Subject: [PATCH 1/4] Bootstrap export/import (#198) * feat: export_bootstrap method added. * feat: import_bootstrap function added. * tests: first test for import/export bootstrap added. * feat: bootstrap feature cleaned up. * docs: boostrap docstrings improved. --- pyerrors/obs.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++ tests/obs_test.py | 14 +++++++++ 2 files changed, 88 insertions(+) diff --git a/pyerrors/obs.py b/pyerrors/obs.py index e32ca821..85fdf851 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -3,6 +3,7 @@ import hashlib import pickle import numpy as np import autograd.numpy as anp # Thinly-wrapped numpy +import scipy from autograd import jacobian import matplotlib.pyplot as plt from scipy.stats import skew, skewtest, kurtosis, kurtosistest @@ -684,6 +685,49 @@ class Obs: tmp_jacks[1:] = (n * mean - full_data) / (n - 1) return tmp_jacks + def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): + """Export bootstrap samples from the Obs + + Parameters + ---------- + samples : int + Number of bootstrap samples to generate. + random_numbers : np.ndarray + Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. + If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. + save_rng : str + Save the random numbers to a file if a path is specified. + + Returns + ------- + numpy.ndarray + Returns a numpy array of length N + 1 where N is the number of samples + for the given ensemble and replicum. The zeroth entry of the array contains + the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples + derived from the Obs. The current implementation only works for observables + defined on exactly one ensemble and replicum. The derived bootstrap samples + should agree with samples from a full bootstrap analysis up to O(1/N). + """ + if len(self.names) != 1: + raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") + + name = self.names[0] + length = self.N + + if random_numbers is None: + seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF + rng = np.random.default_rng(seed) + random_numbers = rng.integers(0, length, size=(samples, length)) + + if save_rng is not None: + np.savetxt(save_rng, random_numbers, fmt='%i') + + proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length + ret = np.zeros(samples + 1) + ret[0] = self.value + ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) + return ret + def __float__(self): return float(self.value) @@ -1550,6 +1594,36 @@ def import_jackknife(jacks, name, idl=None): return new_obs +def import_bootstrap(boots, name, random_numbers): + """Imports bootstrap samples and returns an Obs + + Parameters + ---------- + boots : numpy.ndarray + numpy array containing the mean value as zeroth entry and + the N bootstrap samples as first to Nth entry. + name : str + name of the ensemble the samples are defined on. + random_numbers : np.ndarray + Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, + where samples is the number of bootstrap samples and length is the length of the original Monte Carlo + chain to be reconstructed. + """ + samples, length = random_numbers.shape + if samples != len(boots) - 1: + raise ValueError("Random numbers do not have the correct shape.") + + if samples < length: + raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") + + proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length + + samples = scipy.linalg.lstsq(proj, boots[1:])[0] + ret = Obs([samples], [name]) + ret._value = boots[0] + return ret + + def merge_obs(list_of_obs): """Combine all observables in list_of_obs into one new observable diff --git a/tests/obs_test.py b/tests/obs_test.py index c8b741c2..ed538943 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -1094,6 +1094,20 @@ def test_import_jackknife(): assert my_obs == reconstructed_obs +def test_import_bootstrap(): + seed = 4321 + samples = 1234 + length = 820 + name = "test" + + rng = np.random.default_rng(seed) + random_numbers = rng.integers(0, length, size=(samples, length)) + obs = pe.pseudo_Obs(2.447, 0.14, name, length) + boots = obs.export_bootstrap(1234, random_numbers=random_numbers) + re_obs = pe.import_bootstrap(boots, name, random_numbers=random_numbers) + assert obs == re_obs + + def test_reduce_deltas(): idx_old = range(1, 101) deltas = [float(i) for i in idx_old] From 8736d1cd3cb76a188b78769f7b2bbc38ecc39081 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 14 Jul 2023 13:38:21 +0100 Subject: [PATCH 2/4] feat: CObs format added and complex Corr print improved. (#200) --- pyerrors/correlators.py | 4 +--- pyerrors/obs.py | 8 ++++++++ tests/obs_test.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pyerrors/correlators.py b/pyerrors/correlators.py index 8573cdbf..85e28f4d 100644 --- a/pyerrors/correlators.py +++ b/pyerrors/correlators.py @@ -998,8 +998,6 @@ class Corr: content_string += "Description: " + self.tag + "\n" if self.N != 1: return content_string - if isinstance(self[0], CObs): - return content_string if print_range[1]: print_range[1] += 1 @@ -1010,7 +1008,7 @@ class Corr: else: content_string += str(i + print_range[0]) for element in sub_corr: - content_string += '\t' + ' ' * int(element >= 0) + str(element) + content_string += f"\t{element:+2}" content_string += '\n' return content_string diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 85fdf851..a25dceb8 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -1023,6 +1023,14 @@ class CObs: def __repr__(self): return 'CObs[' + str(self) + ']' + def __format__(self, format_type): + if format_type == "": + significance = 2 + format_type = "2" + else: + significance = int(float(format_type.replace("+", "").replace("-", ""))) + return f"({self.real:{format_type}}{self.imag:+{significance}}j)" + def _format_uncertainty(value, dvalue, significance=2): """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" diff --git a/tests/obs_test.py b/tests/obs_test.py index ed538943..924d98cd 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -1297,6 +1297,16 @@ def test_f_string_obs(): print(f"{o1:-1}") print(f"{o1: 8}") +def test_f_string_cobs(): + o_real = pe.pseudo_Obs(0.348, 0.0123, "test") + o_imag = pe.pseudo_Obs(0.348, 0.0123, "test") + o1 = pe.CObs(o_real, o_imag) + print(f"{o1}") + print(f"{o1:3}") + print(f"{o1:+3}") + print(f"{o1:-1}") + print(f"{o1: 8}") + def test_compute_drho_fails(): obs = pe.input.json.load_json("tests/data/compute_drho_fails.json.gz") obs.gm() From 6dcd0c35182d26c93263191f255bb445c9667ba1 Mon Sep 17 00:00:00 2001 From: s-kuberski Date: Fri, 14 Jul 2023 15:21:59 +0200 Subject: [PATCH 3/4] feat: Added numerical integration of generic functions (#201) * feat: Added numerical integration of generic functions * refactored integration routines * tests: two trivial tests for integration added. * docs: quad docstring corrected. * Small bugfix for integration without obs --------- Co-authored-by: Fabian Joswig --- pyerrors/__init__.py | 1 + pyerrors/integrate.py | 87 +++++++++++++++++++++++++++++++++++++++++ tests/integrate_test.py | 51 ++++++++++++++++++++++++ tests/linalg_test.py | 4 +- 4 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 pyerrors/integrate.py create mode 100644 tests/integrate_test.py diff --git a/pyerrors/__init__.py b/pyerrors/__init__.py index df2c983a..ee259b24 100644 --- a/pyerrors/__init__.py +++ b/pyerrors/__init__.py @@ -485,5 +485,6 @@ from . import input from . import linalg from . import mpm from . import roots +from . import integrate from .version import __version__ diff --git a/pyerrors/integrate.py b/pyerrors/integrate.py new file mode 100644 index 00000000..3b48f3fb --- /dev/null +++ b/pyerrors/integrate.py @@ -0,0 +1,87 @@ +import numpy as np +from .obs import derived_observable, Obs +from autograd import jacobian +from scipy.integrate import quad as squad + + +def quad(func, p, a, b, **kwargs): + '''Performs a (one-dimensional) numeric integration of f(p, x) from a to b. + + The integration is performed using scipy.integrate.quad(). + All parameters that can be passed to scipy.integrate.quad may also be passed to this function. + The output is the same as for scipy.integrate.quad, the first element being an Obs. + + Parameters + ---------- + func : object + function to integrate, has to be of the form + + ```python + import autograd.numpy as anp + + def func(p, x): + return p[0] + p[1] * x + p[2] * anp.sinh(x) + ``` + where x is the integration variable. + p : list of floats or Obs + parameters of the function func. + a: float or Obs + Lower limit of integration (use -numpy.inf for -infinity). + b: float or Obs + Upper limit of integration (use -numpy.inf for -infinity). + All parameters of scipy.integrate.quad + + Returns + ------- + y : Obs + The integral of func from `a` to `b`. + abserr : float + An estimate of the absolute error in the result. + infodict : dict + A dictionary containing additional information. + Run scipy.integrate.quad_explain() for more information. + message + A convergence message. + explain + Appended only with 'cos' or 'sin' weighting and infinite + integration limits, it contains an explanation of the codes in + infodict['ierlst'] + ''' + + Np = len(p) + isobs = [True if isinstance(pi, Obs) else False for pi in p] + pval = np.array([p[i].value if isobs[i] else p[i] for i in range(Np)],) + pobs = [p[i] for i in range(Np) if isobs[i]] + + bounds = [a, b] + isobs_b = [True if isinstance(bi, Obs) else False for bi in bounds] + bval = np.array([bounds[i].value if isobs_b[i] else bounds[i] for i in range(2)]) + bobs = [bounds[i] for i in range(2) if isobs_b[i]] + bsign = [-1, 1] + + ifunc = np.vectorize(lambda x: func(pval, x)) + + intpars = squad.__code__.co_varnames[3:3 + len(squad.__defaults__)] + ikwargs = {k: kwargs[k] for k in intpars if k in kwargs} + + integration_result = squad(ifunc, bval[0], bval[1], **ikwargs) + val = integration_result[0] + + jac = jacobian(func) + + derivint = [] + for i in range(Np): + if isobs[i]: + ifunc = np.vectorize(lambda x: jac(pval, x)[i]) + derivint.append(squad(ifunc, bounds[0], bounds[1], **ikwargs)[0]) + + for i in range(2): + if isobs_b[i]: + derivint.append(bsign[i] * func(pval, bval[i])) + + if len(derivint) == 0: + return integration_result + + res = derived_observable(lambda x, **kwargs: 0 * (x[0] + np.finfo(np.float64).eps) * (pval[0] + np.finfo(np.float64).eps) + val, pobs + bobs, man_grad=derivint) + + return (res, *integration_result[1:]) diff --git a/tests/integrate_test.py b/tests/integrate_test.py new file mode 100644 index 00000000..07a4313b --- /dev/null +++ b/tests/integrate_test.py @@ -0,0 +1,51 @@ +import numpy as np +import autograd.numpy as anp +import pyerrors as pe + + +def test_integration(): + def f(p, x): + return p[0] * x + p[1] * x**2 - p[2] / x + + def F(p, x): + return p[0] * x**2 / 2. + p[1] * x**3 / 3. - anp.log(x) * p[2] + + def check_ana_vs_int(p, l, u, **kwargs): + numint_full = pe.integrate.quad(f, p, l, u, **kwargs) + numint = numint_full[0] + + anaint = F(p, u) - F(p, l) + diff = (numint - anaint) + + if isinstance(numint, pe.Obs): + numint.gm() + anaint.gm() + + assert(diff.is_zero()) + else: + assert(np.isclose(0, diff)) + + pobs = np.array([pe.cov_Obs(1., .1**2, '0'), pe.cov_Obs(2., .2**2, '1'), pe.cov_Obs(2.2, .17**2, '2')]) + lobs = pe.cov_Obs(.123, .012**2, 'l') + uobs = pe.cov_Obs(1., .05**2, 'u') + + check_ana_vs_int(pobs, lobs, uobs) + check_ana_vs_int(pobs, lobs.value, uobs) + check_ana_vs_int(pobs, lobs, uobs.value) + check_ana_vs_int(pobs, lobs.value, uobs.value) + for i in range(len(pobs)): + p = [pi for pi in pobs] + p[i] = pobs[i].value + check_ana_vs_int(p, lobs, uobs) + + check_ana_vs_int([pi.value for pi in pobs], lobs, uobs) + check_ana_vs_int([pi.value for pi in pobs], lobs.value, uobs.value) + + check_ana_vs_int(pobs, lobs, uobs, epsabs=1.e-9, epsrel=1.236e-10, limit=100) + assert(len(pe.integrate.quad(f, pobs, lobs, uobs, full_output=True)) > 2) + + r1, _ = pe.integrate.quad(F, pobs, 1, 0.1) + r2, _ = pe.integrate.quad(F, pobs, 0.1, 1) + assert r1 == -r2 + iamzero, _ = pe.integrate.quad(F, pobs, 1, 1) + assert iamzero == 0 diff --git a/tests/linalg_test.py b/tests/linalg_test.py index babf3797..ec0591ba 100644 --- a/tests/linalg_test.py +++ b/tests/linalg_test.py @@ -14,9 +14,9 @@ def get_real_matrix(dimension): exponent_imag = np.random.normal(0, 1) base_matrix[n, m] = pe.Obs([np.random.normal(1.0, 0.1, 100)], ['t']) - return base_matrix + def get_complex_matrix(dimension): base_matrix = np.empty((dimension, dimension), dtype=object) for (n, m), entry in np.ndenumerate(base_matrix): @@ -109,7 +109,6 @@ def test_einsum(): assert np.all([o.imag.is_zero_within_error(0.001) for o in arr]) assert np.all([o.imag.dvalue < 0.001 for o in arr]) - tt = [get_real_matrix(4), get_real_matrix(3)] q = np.tensordot(tt[0], tt[1], 0) c1 = tt[1] @ q @@ -355,3 +354,4 @@ def test_complex_matrix_real_entries(): my_mat[0, 1] = 4 my_mat[2, 0] = pe.Obs([np.random.normal(1.0, 0.1, 100)], ['t']) assert np.all((my_mat @ pe.linalg.inv(my_mat) - np.identity(4)) == 0) + From 5c2a6de56f013c977dbe8e466aa5243f6d52e7c0 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Fri, 14 Jul 2023 14:26:54 +0100 Subject: [PATCH 4/4] fix: explicit Exception for combined fit constant edge case. (#202) --- pyerrors/fits.py | 6 ++++++ tests/fits_test.py | 17 +++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/pyerrors/fits.py b/pyerrors/fits.py index 04051ffd..5ec857e0 100644 --- a/pyerrors/fits.py +++ b/pyerrors/fits.py @@ -230,6 +230,12 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs): n_parms_ls.append(n_loc) n_parms = max(n_parms_ls) + + if len(key_ls) > 1: + for key in key_ls: + if np.asarray(yd[key]).shape != funcd[key](np.arange(n_parms), xd[key]).shape: + raise ValueError(f"Fit function {key} returns the wrong shape ({funcd[key](np.arange(n_parms), xd[key]).shape} instead of {xd[key].shape})\nIf the fit function is just a constant you could try adding x*0 to get the correct shape.") + if not silent: print('Fit with', n_parms, 'parameter' + 's' * (n_parms > 1)) diff --git a/tests/fits_test.py b/tests/fits_test.py index 48e788bd..80b6de5a 100644 --- a/tests/fits_test.py +++ b/tests/fits_test.py @@ -1143,6 +1143,23 @@ def test_fit_dof(): assert np.all(np.array(cd[1:]) > 0) +def test_combined_fit_constant_shape(): + N1 = 16 + N2 = 10 + x = {"a": np.arange(N1), + "": np.arange(N2)} + y = {"a": [pe.pseudo_Obs(o + np.random.normal(0.0, 0.1), 0.1, "test") for o in range(N1)], + "": [pe.pseudo_Obs(o + np.random.normal(0.0, 0.1), 0.1, "test") for o in range(N2)]} + funcs = {"a": lambda a, x: a[0] + a[1] * x, + "": lambda a, x: a[1]} + with pytest.raises(ValueError): + pe.fits.least_squares(x, y, funcs, method='migrad') + + funcs = {"a": lambda a, x: a[0] + a[1] * x, + "": lambda a, x: a[1] + x * 0} + pe.fits.least_squares(x, y, funcs, method='migrad') + + def fit_general(x, y, func, silent=False, **kwargs): """Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.