diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index e75a5ae1..efb81565 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -21,6 +21,6 @@ jobs: - name: flake8 Lint uses: py-actions/flake8@v1 with: - ignore: "E501,E722" + ignore: "E501" exclude: "__init__.py, input/__init__.py" path: "pyerrors" diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 3c30013f..0f3ae2f3 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -6,6 +6,8 @@ on: - master - develop pull_request: + schedule: + - cron: '0 4 1 * *' jobs: pytest: @@ -13,7 +15,7 @@ jobs: strategy: fail-fast: true matrix: - python-version: ["3.6", "3.7", "3.8", "3.9"] + python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"] steps: - name: Checkout source @@ -27,6 +29,7 @@ jobs: - name: Install run: | python -m pip install --upgrade pip + pip install wheel pip install . pip install pytest pip install pytest-cov diff --git a/.gitignore b/.gitignore index 77feb98c..fdaac56f 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ examples/B1k2_pcac_plateau.p examples/Untitled.* core.* *.swp +htmlcov diff --git a/CHANGELOG.md b/CHANGELOG.md index 92681a4a..73d9eab9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,20 +4,33 @@ All notable changes to this project will be documented in this file. ## [2.0.0] - 2021-??-?? ### Added -- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation -- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int` -- `Obs` objects now have methods `is_zero` and `is_zero_within_error` +- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation. +- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int`. +- The possibility to work with Monte Carlo histories which are evenly or unevenly spaced was added. +- The Corr class now has additional methods like `reverse`, `T_symmetry`, `correlate` and `reweight`. +- `linalg` module now has explicit functions `inv` and `cholesky`. +- `Obs` objects now have methods `is_zero` and `is_zero_within_error` as well as overloaded comparison operations. +- Functions to convert Obs data to or from jackknife was added. +- Alternative matrix multiplication routine `jack_matmul` was added to `linalg` module which makes use of the jackknife approximation and is much faster for large matrices. +- Additional input routines for npr data added to `input.hadrons`. +- Version number added. ### Changed -- Additional attributes can no longer be added to existing `Obs`. This makes it no longer possible to import `Obs` created with previous versions of pyerrors -- The default value for `Corr.prange` is now `None` -- The `input` module was restructured to contain one submodule per data source +- The internal bookkeeping system for ensembles/replica was changed. The separator for replica is now `|`. +- The fit functions were renamed to `least_squares` and `total_least_squares`. +- The fit functions can now deal with provided covariance matrices. +- The convention for the fit range in the Corr class has been changed. +- Obs.print was renamed to Obs.details and the output was improved. +- The default value for `Corr.prange` is now `None`. +- The `input` module was restructured to contain one submodule per data source. +- Performance of Obs.__init__ improved. ### Deprecated - The function `plot_corrs` was deprecated as all its functionality is now contained within `Corr.show` - The kwarg `bias_correction` in `derived_observable` was removed - Obs no longer have an attribute `e_Q` - Removed `fits.fit_exp` +- Removed jackknife module ## [1.1.0] - 2021-10-11 ### Added @@ -77,7 +90,7 @@ All notable changes to this project will be documented in this file. ## [0.7.0] - 2020-03-10 ### Added -- New fit funtions for fitting with and without x-errors added which use automatic differentiation and should be more reliable than the old ones. +- New fit functions for fitting with and without x-errors added which use automatic differentiation and should be more reliable than the old ones. - Fitting with Bayesian priors added. - New functions for visualization of fits which can be activated via the kwargs resplot and qqplot. - chisquare/expected_chisquared which takes into account correlations in the data and non-linearities in the fit function can now be activated with the kwarg expected_chisquare. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a6705807..cd627bd2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,25 +9,27 @@ and create your own branch cd pyerrors git checkout -b feature/my_feature ``` -I find it convenient to install the package in editable mode in the local python environment +It can be convenient to install the package in editable mode in the local python environment when developing new features ``` pip install -e . ``` Please send any pull requests to the `develop` branch. + ### Documentation -Please add docstrings to any new function, class or method you implement. +Please add docstrings to any new function, class or method you implement. The documentation is automatically generated from these docstrings. The startpage of the documentation is generated from the docstring of `pyerrors/__init__.py`. ### Tests When implementing a new feature or fixing a bug please add meaningful tests to the files in the `tests` directory which cover the new code. - -### Continous integration For all pull requests tests are executed for the most recent python releases via ``` -pytest --cov=pyerrors -v +pytest --cov=pyerrors -vv ``` -requiring `pytest`, `pytest-cov` and `pytest-benchmark` -and the linter `flake8` is executed with the command +requiring `pytest`, `pytest-cov` and `pytest-benchmark`. To get a coverage report in html run ``` -flake8 --ignore=E501,E722 --exclude=__init__.py pyerrors +pytest --cov=pyerrors --cov-report html +``` +The linter `flake8` is executed with the command +``` +flake8 --ignore=E501 --exclude=__init__.py pyerrors ``` Please make sure that all tests are passed for a new pull requests. diff --git a/README.md b/README.md index 4e4a0a9d..773970cb 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ `pyerrors` is a python package for error computation and propagation of Markov chain Monte Carlo data. - **Documentation:** https://fjosw.github.io/pyerrors/pyerrors.html -- **Examples**: https://github.com/fjosw/pyerrors/tree/develop/examples +- **Examples**: https://github.com/fjosw/pyerrors/tree/develop/examples (Do not work properly at the moment) - **Contributing:** https://github.com/fjosw/pyerrors/blob/develop/CONTRIBUTING.md - **Bug reports:** https://github.com/fjosw/pyerrors/issues @@ -16,9 +16,3 @@ to install the most recent release run ```bash pip install git+https://github.com/fjosw/pyerrors.git@master ``` - -## Other implementations -There exist similar publicly available implementations of gamma method error analysis suites in -- [Fortran](https://gitlab.ift.uam-csic.es/alberto/aderrors) -- [Julia](https://gitlab.ift.uam-csic.es/alberto/aderrors.jl) -- [Python](https://github.com/mbruno46/pyobs) diff --git a/pyerrors/__init__.py b/pyerrors/__init__.py index 2f194514..9c427818 100644 --- a/pyerrors/__init__.py +++ b/pyerrors/__init__.py @@ -1,29 +1,26 @@ r''' # What is pyerrors? `pyerrors` is a python package for error computation and propagation of Markov chain Monte Carlo data. -It is based on the **gamma method** [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). Some of its features are: -- **automatic differentiation** as suggested in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289) (partly based on the [autograd](https://github.com/HIPS/autograd) package) -- **treatment of slow modes** in the simulation as suggested in [arXiv:1009.5228](https://arxiv.org/abs/1009.5228) -- coherent **error propagation** for data from **different Markov chains** -- **non-linear fits with x- and y-errors** and exact linear error propagation based on automatic differentiation as introduced in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289) -- **real and complex matrix operations** and their error propagation based on automatic differentiation (Cholesky decomposition, calculation of eigenvalues and eigenvectors, singular value decomposition...) +It is based on the gamma method [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). Some of its features are: +- automatic differentiation for exact liner error propagation as suggested in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289) (partly based on the [autograd](https://github.com/HIPS/autograd) package). +- treatment of slow modes in the simulation as suggested in [arXiv:1009.5228](https://arxiv.org/abs/1009.5228). +- coherent error propagation for data from different Markov chains. +- non-linear fits with x- and y-errors and exact linear error propagation based on automatic differentiation as introduced in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289). +- real and complex matrix operations and their error propagation based on automatic differentiation (Matrix inverse, Cholesky decomposition, calculation of eigenvalues and eigenvectors, singular value decomposition...). -## Getting started +There exist similar publicly available implementations of gamma method error analysis suites in [Fortran](https://gitlab.ift.uam-csic.es/alberto/aderrors), [Julia](https://gitlab.ift.uam-csic.es/alberto/aderrors.jl) and [Python](https://github.com/mbruno46/pyobs). + +## Basic example ```python import numpy as np import pyerrors as pe -my_obs = pe.Obs([samples], ['ensemble_name']) -my_new_obs = 2 * np.log(my_obs) / my_obs ** 2 -my_new_obs.gamma_method() -print(my_new_obs) +my_obs = pe.Obs([samples], ['ensemble_name']) # Initialize an Obs object +my_new_obs = 2 * np.log(my_obs) / my_obs ** 2 # Construct derived Obs object +my_new_obs.gamma_method() # Estimate the statistical error +print(my_new_obs) # Print the result to stdout > 0.31498(72) - -iamzero = my_new_obs - my_new_obs -iamzero.gamma_method() -print(iamzero == 0.0) -> True ``` # The `Obs` class @@ -33,7 +30,6 @@ An `Obs` object can be initialized with two arguments, the first is a list conta The samples can either be provided as python list or as numpy array. The second argument is a list containing the names of the respective Monte Carlo chains as strings. These strings uniquely identify a Monte Carlo chain/ensemble. -Example: ```python import pyerrors as pe @@ -43,14 +39,12 @@ my_obs = pe.Obs([samples], ['ensemble_name']) ## Error propagation When performing mathematical operations on `Obs` objects the correct error propagation is intrinsically taken care using a first order Taylor expansion -$$\delta_f^i=\sum_\alpha \bar{f}_\alpha \delta_\alpha^i\,,\quad \delta_\alpha^i=a_\alpha^i-\bar{a}_\alpha$$ +$$\delta_f^i=\sum_\alpha \bar{f}_\alpha \delta_\alpha^i\,,\quad \delta_\alpha^i=a_\alpha^i-\bar{a}_\alpha\,,$$ as introduced in [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). - The required derivatives $\bar{f}_\alpha$ are evaluated up to machine precision via automatic differentiation as suggested in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289). The `Obs` class is designed such that mathematical numpy functions can be used on `Obs` just as for regular floats. -Example: ```python import numpy as np import pyerrors as pe @@ -61,6 +55,11 @@ my_obs2 = pe.Obs([samples2], ['ensemble_name']) my_sum = my_obs1 + my_obs2 my_m_eff = np.log(my_obs1 / my_obs2) + +iamzero = my_m_eff - my_m_eff +# Check that value and fluctuations are zero within machine precision +print(iamzero == 0.0) +> True ``` ## Error estimation @@ -68,7 +67,6 @@ my_m_eff = np.log(my_obs1 / my_obs2) The error estimation within `pyerrors` is based on the gamma method introduced in [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). After having arrived at the derived quantity of interest the `gamma_method` can be called as detailed in the following example. -Example: ```python my_sum.gamma_method() print(my_sum) @@ -81,9 +79,11 @@ my_sum.details() ``` -The standard value for the automatic windowing procedure is $S=2$. Other values for $S$ can be passed to the `gamma_method` as parameter. +We use the following definition of the integrated autocorrelation time established in [Madras & Sokal 1988](https://link.springer.com/article/10.1007/BF01022990) +$$\tau_\mathrm{int}=\frac{1}{2}+\sum_{t=1}^{W}\rho(t)\geq \frac{1}{2}\,.$$ +The window $W$ is determined via the automatic windowing procedure described in [arXiv:hep-lat/0306017](https://arxiv.org/abs/hep-lat/0306017). +The standard value for the parameter $S$ of this automatic windowing procedure is $S=2$. Other values for $S$ can be passed to the `gamma_method` as parameter. -Example: ```python my_sum.gamma_method(S=3.0) my_sum.details() @@ -94,19 +94,15 @@ my_sum.details() ``` -The integrated autocorrelation time $\tau_\mathrm{int}$ and the autocorrelation function $\rho(W)$ can be monitored via the methods ´pyerrors.obs.Obs.plot_tauint` and ´pyerrors.obs.Obs.plot_tauint`. +The integrated autocorrelation time $\tau_\mathrm{int}$ and the autocorrelation function $\rho(W)$ can be monitored via the methods `pyerrors.obs.Obs.plot_tauint` and `pyerrors.obs.Obs.plot_tauint`. -Example: -```python -my_sum.plot_tauint() -my_sum.plot_rho() -``` +If the parameter $S$ is set to zero it is assumed that dataset does not exhibit any autocorrelation and the windowsize is chosen to be zero. +In this case the error estimate is identical to the sample standard error. ### Exponential tails Slow modes in the Monte Carlo history can be accounted for by attaching an exponential tail to the autocorrelation function $\rho$ as suggested in [arXiv:1009.5228](https://arxiv.org/abs/1009.5228). The longest autocorrelation time in the history, $\tau_\mathrm{exp}$, can be passed to the `gamma_method` as parameter. In this case the automatic windowing procedure is vacated and the parameter $S$ does not affect the error estimate. -Example: ```python my_sum.gamma_method(tau_exp=7.2) my_sum.details() @@ -116,20 +112,19 @@ my_sum.details() > · Ensemble 'ensemble_name' : 1000 configurations (from 1 to 1000) ``` -For the full API see `pyerrors.obs.Obs.gamma_method` +For the full API see `pyerrors.obs.Obs.gamma_method`. ## Multiple ensembles/replica Error propagation for multiple ensembles (Markov chains with different simulation parameters) is handled automatically. Ensembles are uniquely identified by their `name`. -Example: ```python obs1 = pe.Obs([samples1], ['ensemble1']) obs2 = pe.Obs([samples2], ['ensemble2']) my_sum = obs1 + obs2 my_sum.details() -> Result 2.00697958e+00 +/- 0.00000000e+00 +/- 0.00000000e+00 (0.000%) +> Result 2.00697958e+00 > 1500 samples in 2 ensembles: > · Ensemble 'ensemble1' : 1000 configurations (from 1 to 1000) > · Ensemble 'ensemble2' : 500 configurations (from 1 to 500) @@ -137,14 +132,13 @@ my_sum.details() `pyerrors` identifies multiple replica (independent Markov chains with identical simulation parameters) by the vertical bar `|` in the name of the data set. -Example: ```python obs1 = pe.Obs([samples1], ['ensemble1|r01']) obs2 = pe.Obs([samples2], ['ensemble1|r02']) > my_sum = obs1 + obs2 > my_sum.details() -> Result 2.00697958e+00 +/- 0.00000000e+00 +/- 0.00000000e+00 (0.000%) +> Result 2.00697958e+00 > 1500 samples in 1 ensemble: > · Ensemble 'ensemble1' > · Replicum 'r01' : 1000 configurations (from 1 to 1000) @@ -155,7 +149,6 @@ obs2 = pe.Obs([samples2], ['ensemble1|r02']) In order to keep track of different error analysis parameters for different ensembles one can make use of global dictionaries as detailed in the following example. -Example: ```python pe.Obs.S_dict['ensemble1'] = 2.5 pe.Obs.tau_exp_dict['ensemble2'] = 8.0 @@ -170,32 +163,43 @@ Passing arguments to the `gamma_method` still dominates over the dictionaries. Irregular Monte Carlo chains can be initialized with the parameter `idl`. -Example: ```python # Observable defined on configurations 20 to 519 obs1 = pe.Obs([samples1], ['ensemble1'], idl=[range(20, 520)]) +obs1.details() +> Result 9.98319881e-01 +> 500 samples in 1 ensemble: +> · Ensemble 'ensemble1' : 500 configurations (from 20 to 519) # Observable defined on every second configuration between 5 and 1003 obs2 = pe.Obs([samples2], ['ensemble1'], idl=[range(5, 1005, 2)]) +obs2.details() +> Result 9.99100712e-01 +> 500 samples in 1 ensemble: +> · Ensemble 'ensemble1' : 500 configurations (from 5 to 1003 in steps of 2) # Observable defined on configurations 2, 9, 28, 29 and 501 obs3 = pe.Obs([samples3], ['ensemble1'], idl=[[2, 9, 28, 29, 501]]) +obs3.details() +> Result 1.01718064e+00 +> 5 samples in 1 ensemble: +> · Ensemble 'ensemble1' : 5 configurations (irregular range) + ``` **Warning:** Irregular Monte Carlo chains can result in odd patterns in the autocorrelation functions. Make sure to check the autocorrelation time with e.g. `pyerrors.obs.Obs.plot_rho` or `pyerrors.obs.Obs.plot_tauint`. -For the full API see `pyerrors.obs.Obs` +For the full API see `pyerrors.obs.Obs`. # Correlators -For the full API see `pyerrors.correlators.Corr` +For the full API see `pyerrors.correlators.Corr`. # Complex observables `pyerrors` can handle complex valued observables via the class `pyerrors.obs.CObs`. `CObs` are initialized with a real and an imaginary part which both can be `Obs` valued. -Example: ```python my_real_part = pe.Obs([samples1], ['ensemble1']) my_imag_part = pe.Obs([samples2], ['ensemble1']) @@ -225,8 +229,8 @@ print(my_derived_cobs) The preferred exported file format within `pyerrors` is ## Jackknife samples -For comparison with other analysis workflows `pyerrors` can generate jackknife samples from an `Obs` object. -See `pyerrors.obs.Obs.export_jackknife` for details. +For comparison with other analysis workflows `pyerrors` can generate jackknife samples from an `Obs` object or import jackknife samples into an `Obs` object. +See `pyerrors.obs.Obs.export_jackknife` and `pyerrors.obs.import_jackknife` for details. # Input `pyerrors.input` @@ -234,12 +238,11 @@ See `pyerrors.obs.Obs.export_jackknife` for details. from .obs import * from .correlators import * from .fits import * +from .misc import * from . import dirac from . import input from . import linalg -from . import misc from . import mpm -from . import npr from . import roots from .version import __version__ diff --git a/pyerrors/correlators.py b/pyerrors/correlators.py index 7e333cdb..90dcf193 100644 --- a/pyerrors/correlators.py +++ b/pyerrors/correlators.py @@ -3,7 +3,8 @@ import numpy as np import autograd.numpy as anp import matplotlib.pyplot as plt import scipy.linalg -from .obs import Obs, dump_object, reweight, correlate +from .obs import Obs, reweight, correlate +from .misc import dump_object from .fits import least_squares from .linalg import eigh, inv, cholesky from .roots import find_root @@ -442,7 +443,7 @@ class Corr: if self.prange: fitrange = self.prange else: - fitrange = [0, self.T] + fitrange = [0, self.T - 1] xs = [x for x in range(fitrange[0], fitrange[1] + 1) if not self.content[x] is None] ys = [self.content[x][0] for x in range(fitrange[0], fitrange[1] + 1) if not self.content[x] is None] @@ -535,7 +536,7 @@ class Corr: y_min = min([(x[0].value - x[0].dvalue) for x in self.content[x_range[0]: x_range[1] + 1] if (x is not None) and x[0].dvalue < 2 * np.abs(x[0].value)]) y_max = max([(x[0].value + x[0].dvalue) for x in self.content[x_range[0]: x_range[1] + 1] if (x is not None) and x[0].dvalue < 2 * np.abs(x[0].value)]) ax1.set_ylim([y_min - 0.1 * (y_max - y_min), y_max + 0.1 * (y_max - y_min)]) - except: + except Exception: pass else: ax1.set_ylim(y_range) diff --git a/pyerrors/covobs.py b/pyerrors/covobs.py new file mode 100644 index 00000000..2c8f81fc --- /dev/null +++ b/pyerrors/covobs.py @@ -0,0 +1,75 @@ +import numpy as np + + +class Covobs: + + def __init__(self, mean, cov, name, pos=None, grad=None): + """ Initialize Covobs object. + + Parameters + ---------- + mean : float + Mean value of the new Obs + cov : list or array + 2d Covariance matrix or 1d diagonal entries + name : str + identifier for the covariance matrix + pos : int + Position of the variance belonging to mean in cov. + Is taken to be 1 if cov is 0-dimensional + grad : list or array + Gradient of the Covobs wrt. the means belonging to cov. + """ + self._set_cov(cov) + if '|' in name: + raise Exception("Covobs name must not contain replica separator '|'.") + self.name = name + if grad is None: + if pos is None: + if self.N == 1: + pos = 0 + else: + raise Exception('Have to specify position of cov-element belonging to mean!') + else: + if pos > self.N: + raise Exception('pos %d too large for covariance matrix with dimension %dx%d!' % (pos, self.N, self.N)) + self._grad = np.zeros((self.N, 1)) + self._grad[pos] = 1. + else: + self._set_grad(grad) + self.value = mean + + def errsq(self): + """ Return the variance (= square of the error) of the Covobs + """ + return float(np.dot(np.transpose(self.grad), np.dot(self.cov, self.grad))) + + def _set_cov(self, cov): + self._cov = np.array(cov) + if self._cov.ndim == 0: + self.N = 1 + self._cov = np.diag([self._cov]) + elif self._cov.ndim == 1: + self.N = len(self._cov) + self._cov = np.diag(self._cov) + elif self._cov.ndim == 2: + self.N = self._cov.shape[0] + if self._cov.shape[1] != self.N: + raise Exception('Covariance matrix has to be a square matrix!') + else: + raise Exception('Covariance matrix has to be a 2 dimensional square matrix!') + + def _set_grad(self, grad): + self._grad = np.array(grad) + if self._grad.ndim in [0, 1]: + self._grad = np.reshape(self._grad, (self.N, 1)) + elif self._grad.ndim != 2: + raise Exception('Invalid dimension of grad!') + + @property + def cov(self): + return self._cov + + @property + def grad(self): + return self._grad diff --git a/pyerrors/fits.py b/pyerrors/fits.py index 291a5568..d446e72e 100644 --- a/pyerrors/fits.py +++ b/pyerrors/fits.py @@ -1,4 +1,3 @@ -import gc from collections.abc import Sequence import warnings import numpy as np @@ -11,7 +10,7 @@ from scipy.odr import ODR, Model, RealData import iminuit from autograd import jacobian from autograd import elementwise_grad as egrad -from .obs import Obs, derived_observable, covariance, pseudo_Obs +from .obs import Obs, derived_observable, covariance, cov_Obs class Fit_result(Sequence): @@ -88,8 +87,6 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs): priors has to be a list with an entry for every parameter in the fit. The entries can either be Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like 0.548(23), 500(40) or 0.5(0.4) - It is important for the subsequent error estimation that the e_tag for the gamma method is large - enough. silent : bool, optional If true all output to the console is omitted (default False). initial_guess : list @@ -109,6 +106,11 @@ def least_squares(x, y, func, priors=None, silent=False, **kwargs): corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False). + correlated_fit : bool + If true, use the full correlation matrix in the definition of the chisquare + (only works for prior==None and when no method is given, at the moment). + const_par : list, optional + List of N Obs that are used to constrain the last N fit parameters of func. ''' if priors is not None: return _prior_fit(x, y, func, priors, silent=silent, **kwargs) @@ -154,6 +156,8 @@ def total_least_squares(x, y, func, silent=False, **kwargs): corrected by effects caused by correlated input data. This can take a while as the full correlation matrix has to be calculated (default False). + const_par : list, optional + List of N Obs that are used to constrain the last N fit parameters of func. Based on the orthogonal distance regression module of scipy ''' @@ -169,10 +173,21 @@ def total_least_squares(x, y, func, silent=False, **kwargs): if not callable(func): raise TypeError('func has to be a function.') + func_aug = func + if 'const_par' in kwargs: + const_par = kwargs['const_par'] + if isinstance(const_par, Obs): + const_par = [const_par] + + def func(p, x): + return func_aug(np.concatenate((p, [o.value for o in const_par])), x) + else: + const_par = [] + for i in range(25): try: func(np.arange(i), x.T[0]) - except: + except Exception: pass else: break @@ -180,6 +195,8 @@ def total_least_squares(x, y, func, silent=False, **kwargs): n_parms = i if not silent: print('Fit with', n_parms, 'parameters') + if(len(const_par) > 0): + print('\t and %d constrained parameter%s' % (len(const_par), 's' if len(const_par) > 1 else ''), const_par) x_f = np.vectorize(lambda o: o.value)(x) dx_f = np.vectorize(lambda o: o.dvalue)(x) @@ -195,7 +212,7 @@ def total_least_squares(x, y, func, silent=False, **kwargs): if 'initial_guess' in kwargs: x0 = kwargs.get('initial_guess') if len(x0) != n_parms: - raise Exception('Initial guess does not have the correct length.') + raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) else: x0 = [1] * n_parms @@ -222,12 +239,18 @@ def total_least_squares(x, y, func, silent=False, **kwargs): raise Exception('The minimization procedure did not converge.') m = x_f.size + n_parms_aug = n_parms + len(const_par) def odr_chisquare(p): model = func(p[:n_parms], p[n_parms:].reshape(x_shape)) chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2) return chisq + def odr_chisquare_aug(p): + model = func_aug(np.concatenate((p[:n_parms_aug], [o.value for o in const_par])), p[n_parms_aug:].reshape(x_shape)) + chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms_aug:].reshape(x_shape)) / dx_f) ** 2) + return chisq + if kwargs.get('expected_chisquare') is True: W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel())))) @@ -254,31 +277,32 @@ def total_least_squares(x, y, func, silent=False, **kwargs): print('chisquare/expected_chisquare:', output.chisquare_by_expected_chisquare) - hess_inv = np.linalg.pinv(jacobian(jacobian(odr_chisquare))(np.concatenate((out.beta, out.xplus.ravel())))) + fitp = np.concatenate((out.beta, [o.value for o in const_par])) + hess_inv = np.linalg.pinv(jacobian(jacobian(odr_chisquare_aug))(np.concatenate((fitp, out.xplus.ravel())))) def odr_chisquare_compact_x(d): - model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) - chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) + model = func_aug(d[:n_parms_aug], d[n_parms_aug:n_parms_aug + m].reshape(x_shape)) + chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms_aug + m:].reshape(x_shape) - d[n_parms_aug:n_parms_aug + m].reshape(x_shape)) / dx_f) ** 2) return chisq - jac_jac_x = jacobian(jacobian(odr_chisquare_compact_x))(np.concatenate((out.beta, out.xplus.ravel(), x_f.ravel()))) + jac_jac_x = jacobian(jacobian(odr_chisquare_compact_x))(np.concatenate((fitp, out.xplus.ravel(), x_f.ravel()))) - deriv_x = -hess_inv @ jac_jac_x[:n_parms + m, n_parms + m:] + deriv_x = -hess_inv @ jac_jac_x[:n_parms_aug + m, n_parms_aug + m:] def odr_chisquare_compact_y(d): - model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape)) - chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2) + model = func_aug(d[:n_parms_aug], d[n_parms_aug:n_parms_aug + m].reshape(x_shape)) + chisq = anp.sum(((d[n_parms_aug + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms_aug:n_parms_aug + m].reshape(x_shape)) / dx_f) ** 2) return chisq - jac_jac_y = jacobian(jacobian(odr_chisquare_compact_y))(np.concatenate((out.beta, out.xplus.ravel(), y_f))) + jac_jac_y = jacobian(jacobian(odr_chisquare_compact_y))(np.concatenate((fitp, out.xplus.ravel(), y_f))) - deriv_y = -hess_inv @ jac_jac_y[:n_parms + m, n_parms + m:] + deriv_y = -hess_inv @ jac_jac_y[:n_parms_aug + m, n_parms_aug + m:] result = [] for i in range(n_parms): - result.append(derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(out.beta[i], 0.0, y[0].names[0], y[0].shape[y[0].names[0]])] + list(x.ravel()) + list(y), man_grad=[0] + list(deriv_x[i]) + list(deriv_y[i]))) + result.append(derived_observable(lambda my_var, **kwargs: (my_var[0] + np.finfo(np.float64).eps) / (x.ravel()[0].value + np.finfo(np.float64).eps) * out.beta[i], list(x.ravel()) + list(y), man_grad=list(deriv_x[i]) + list(deriv_y[i]))) - output.fit_parameters = result + output.fit_parameters = result + const_par output.odr_chisquare = odr_chisquare(np.concatenate((out.beta, out.xplus.ravel()))) output.dof = x.shape[-1] - n_parms @@ -296,9 +320,6 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): output.fit_function = func - if Obs.e_tag_global < 4: - warnings.warn("e_tag_global is smaller than 4, this can cause problems when calculating errors from fits with priors", RuntimeWarning) - x = np.asarray(x) if not callable(func): @@ -307,7 +328,7 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): for i in range(100): try: func(np.arange(i), 0) - except: + except Exception: pass else: break @@ -331,7 +352,7 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): loc_priors.append(i_prior) else: loc_val, loc_dval = extract_val_and_dval(i_prior) - loc_priors.append(pseudo_Obs(loc_val, loc_dval, 'p' + str(i_n))) + loc_priors.append(cov_Obs(loc_val, loc_dval ** 2, '#prior' + str(i_n) + f"_{np.random.randint(2147483647):010d}")) output.priors = loc_priors @@ -365,13 +386,15 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): if not silent: print('Method: migrad') - m = iminuit.Minuit.from_array_func(chisqfunc, x0, error=np.asarray(x0) * 0.01, errordef=1, print_level=0) + m = iminuit.Minuit(chisqfunc, x0) + m.errordef = 1 + m.print_level = 0 if 'tol' in kwargs: m.tol = kwargs.get('tol') else: m.tol = 1e-4 m.migrad() - params = np.asarray(m.values.values()) + params = np.asarray(m.values) output.chisquare_by_dof = m.fval / len(x) @@ -380,7 +403,7 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): if not silent: print('chisquare/d.o.f.:', output.chisquare_by_dof) - if not m.get_fmin().is_valid: + if not m.fmin.is_valid: raise Exception('The minimization procedure did not converge.') hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc))(params)) @@ -396,7 +419,7 @@ def _prior_fit(x, y, func, priors, silent=False, **kwargs): result = [] for i in range(n_parms): - result.append(derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(params[i], 0.0, y[0].names[0], y[0].shape[y[0].names[0]])] + list(y) + list(loc_priors), man_grad=[0] + list(deriv[i]))) + result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * params[i], list(y) + list(loc_priors), man_grad=list(deriv[i]))) output.fit_parameters = result output.chisquare = chisqfunc(np.asarray(params)) @@ -432,10 +455,21 @@ def _standard_fit(x, y, func, silent=False, **kwargs): if not callable(func): raise TypeError('func has to be a function.') + func_aug = func + if 'const_par' in kwargs: + const_par = kwargs['const_par'] + if isinstance(const_par, Obs): + const_par = [const_par] + + def func(p, x): + return func_aug(np.concatenate((p, [o.value for o in const_par])), x) + else: + const_par = [] + for i in range(25): try: func(np.arange(i), x.T[0]) - except: + except Exception: pass else: break @@ -444,6 +478,8 @@ def _standard_fit(x, y, func, silent=False, **kwargs): if not silent: print('Fit with', n_parms, 'parameters') + if(len(const_par) > 0): + print('\t and %d constrained parameter%s' % (len(const_par), 's' if len(const_par) > 1 else ''), const_par) y_f = [o.value for o in y] dy_f = [o.dvalue for o in y] @@ -454,14 +490,44 @@ def _standard_fit(x, y, func, silent=False, **kwargs): if 'initial_guess' in kwargs: x0 = kwargs.get('initial_guess') if len(x0) != n_parms: - raise Exception('Initial guess does not have the correct length.') + raise Exception('Initial guess does not have the correct length: %d vs. %d' % (len(x0), n_parms)) else: x0 = [0.1] * n_parms - def chisqfunc(p): - model = func(p, x) - chisq = anp.sum(((y_f - model) / dy_f) ** 2) - return chisq + if kwargs.get('correlated_fit') is True: + cov = covariance_matrix(y) + covdiag = np.diag(1. / np.sqrt(np.diag(cov))) + corr = np.copy(cov) + for i in range(len(y)): + for j in range(len(y)): + corr[i][j] = cov[i][j] / np.sqrt(cov[i][i] * cov[j][j]) + condn = np.linalg.cond(corr) + if condn > 1e4: + warnings.warn("Correlation matrix may be ill-conditioned! condition number: %1.2e" % (condn), RuntimeWarning) + chol = np.linalg.cholesky(corr) + chol_inv = np.linalg.inv(chol) + chol_inv = np.dot(chol_inv, covdiag) + + def chisqfunc(p): + model = func(p, x) + chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) + return chisq + + def chisqfunc_aug(p): + model = func_aug(np.concatenate((p, [o.value for o in const_par])), x) + chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2) + return chisq + + else: + def chisqfunc(p): + model = func(p, x) + chisq = anp.sum(((y_f - model) / dy_f) ** 2) + return chisq + + def chisqfunc_aug(p): + model = func_aug(np.concatenate((p, [o.value for o in const_par])), x) + chisq = anp.sum(((y_f - model) / dy_f) ** 2) + return chisq if 'method' in kwargs: output.method = kwargs.get('method') @@ -482,10 +548,17 @@ def _standard_fit(x, y, func, silent=False, **kwargs): if not silent: print('Method: Levenberg-Marquardt') - def chisqfunc_residuals(p): - model = func(p, x) - chisq = ((y_f - model) / dy_f) - return chisq + if kwargs.get('correlated_fit') is True: + def chisqfunc_residuals(p): + model = func(p, x) + chisq = anp.dot(chol_inv, (y_f - model)) + return chisq + + else: + def chisqfunc_residuals(p): + model = func(p, x) + chisq = ((y_f - model) / dy_f) + return chisq fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15) @@ -507,32 +580,42 @@ def _standard_fit(x, y, func, silent=False, **kwargs): print('chisquare/d.o.f.:', output.chisquare_by_dof) if kwargs.get('expected_chisquare') is True: - W = np.diag(1 / np.asarray(dy_f)) - cov = covariance_matrix(y) - A = W @ jacobian(func)(fit_result.x, x) - P_phi = A @ np.linalg.inv(A.T @ A) @ A.T - expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W) - output.chisquare_by_expected_chisquare = chisquare / expected_chisquare - if not silent: - print('chisquare/expected_chisquare:', - output.chisquare_by_expected_chisquare) + if kwargs.get('correlated_fit') is not True: + W = np.diag(1 / np.asarray(dy_f)) + cov = covariance_matrix(y) + A = W @ jacobian(func)(fit_result.x, x) + P_phi = A @ np.linalg.inv(A.T @ A) @ A.T + expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W) + output.chisquare_by_expected_chisquare = chisquare / expected_chisquare + if not silent: + print('chisquare/expected_chisquare:', + output.chisquare_by_expected_chisquare) - hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc))(fit_result.x)) + fitp = np.concatenate((fit_result.x, [o.value for o in const_par])) + hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc_aug))(fitp)) - def chisqfunc_compact(d): - model = func(d[:n_parms], x) - chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2) - return chisq + n_parms_aug = n_parms + len(const_par) + if kwargs.get('correlated_fit') is True: + def chisqfunc_compact(d): + model = func_aug(d[:n_parms_aug], x) + chisq = anp.sum(anp.dot(chol_inv, (d[n_parms_aug:] - model)) ** 2) + return chisq - jac_jac = jacobian(jacobian(chisqfunc_compact))(np.concatenate((fit_result.x, y_f))) + else: + def chisqfunc_compact(d): + model = func_aug(d[:n_parms_aug], x) + chisq = anp.sum(((d[n_parms_aug:] - model) / dy_f) ** 2) + return chisq - deriv = -hess_inv @ jac_jac[:n_parms, n_parms:] + jac_jac = jacobian(jacobian(chisqfunc_compact))(np.concatenate((fitp, y_f))) + + deriv = -hess_inv @ jac_jac[:n_parms_aug, n_parms_aug:] result = [] for i in range(n_parms): - result.append(derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(fit_result.x[i], 0.0, y[0].names[0], y[0].shape[y[0].names[0]])] + list(y), man_grad=[0] + list(deriv[i]))) + result.append(derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (y[0].value + np.finfo(np.float64).eps) * fit_result.x[i], list(y), man_grad=list(deriv[i]))) - output.fit_parameters = result + output.fit_parameters = result + const_par output.chisquare = chisqfunc(fit_result.x) output.dof = x.shape[-1] - n_parms @@ -564,10 +647,10 @@ def fit_lin(x, y, **kwargs): return y if all(isinstance(n, Obs) for n in x): - out = odr_fit(x, y, f, **kwargs) + out = total_least_squares(x, y, f, **kwargs) return out.fit_parameters elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): - out = standard_fit(x, y, f, **kwargs) + out = least_squares(x, y, f, **kwargs) return out.fit_parameters else: raise Exception('Unsupported types for x') @@ -596,7 +679,7 @@ def qqplot(x, o_y, func, p): plt.xlabel('Theoretical quantiles') plt.ylabel('Ordered Values') plt.legend() - plt.show() + plt.draw() def residual_plot(x, y, func, fit_res): @@ -625,7 +708,7 @@ def residual_plot(x, y, func, fit_res): ax1.set_xlim([xstart, xstop]) ax1.set_ylabel('Residuals') plt.subplots_adjust(wspace=None, hspace=None) - plt.show() + plt.draw() def covariance_matrix(y): @@ -657,150 +740,3 @@ def error_band(x, func, beta): err = np.array(err) return err - - -def ks_test(obs=None): - """Performs a Kolmogorov–Smirnov test for the Q-values of all fit object. - - If no list is given all Obs in memory are used. - - Disclaimer: The determination of the individual Q-values as well as this function have not been tested yet. - """ - - raise Exception('Not yet implemented') - - if obs is None: - obs_list = [] - for obj in gc.get_objects(): - if isinstance(obj, Obs): - obs_list.append(obj) - else: - obs_list = obs - - # TODO: Rework to apply to Q-values of all fits in memory - Qs = [] - for obs_i in obs_list: - for ens in obs_i.e_names: - if obs_i.e_Q[ens] is not None: - Qs.append(obs_i.e_Q[ens]) - - bins = len(Qs) - x = np.arange(0, 1.001, 0.001) - plt.plot(x, x, 'k', zorder=1) - plt.xlim(0, 1) - plt.ylim(0, 1) - plt.xlabel('Q value') - plt.ylabel('Cumulative probability') - plt.title(str(bins) + ' Q values') - - n = np.arange(1, bins + 1) / np.float64(bins) - Xs = np.sort(Qs) - plt.step(Xs, n) - diffs = n - Xs - loc_max_diff = np.argmax(np.abs(diffs)) - loc = Xs[loc_max_diff] - plt.annotate(s='', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0)) - plt.show() - - print(scipy.stats.kstest(Qs, 'uniform')) - - -def fit_general(x, y, func, silent=False, **kwargs): - """Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. - - Plausibility of the results should be checked. To control the numerical differentiation - the kwargs of numdifftools.step_generators.MaxStepGenerator can be used. - - func has to be of the form - - def func(a, x): - y = a[0] + a[1] * x + a[2] * np.sinh(x) - return y - - y has to be a list of Obs, the dvalues of the Obs are used as yerror for the fit. - x can either be a list of floats in which case no xerror is assumed, or - a list of Obs, where the dvalues of the Obs are used as xerror for the fit. - - Keyword arguments - ----------------- - silent -- If true all output to the console is omitted (default False). - initial_guess -- can provide an initial guess for the input parameters. Relevant for non-linear fits - with many parameters. - """ - - warnings.warn("New fit functions with exact error propagation are now available as alternative.", DeprecationWarning) - - if not callable(func): - raise TypeError('func has to be a function.') - - for i in range(10): - try: - func(np.arange(i), 0) - except: - pass - else: - break - n_parms = i - if not silent: - print('Fit with', n_parms, 'parameters') - - global print_output, beta0 - print_output = 1 - if 'initial_guess' in kwargs: - beta0 = kwargs.get('initial_guess') - if len(beta0) != n_parms: - raise Exception('Initial guess does not have the correct length.') - else: - beta0 = np.arange(n_parms) - - if len(x) != len(y): - raise Exception('x and y have to have the same length') - - if all(isinstance(n, Obs) for n in x): - obs = x + y - x_constants = None - xerr = [o.dvalue for o in x] - yerr = [o.dvalue for o in y] - elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): - obs = y - x_constants = x - xerr = None - yerr = [o.dvalue for o in y] - else: - raise Exception('Unsupported types for x') - - def do_the_fit(obs, **kwargs): - - global print_output, beta0 - - func = kwargs.get('function') - yerr = kwargs.get('yerr') - length = len(yerr) - - xerr = kwargs.get('xerr') - - if length == len(obs): - assert 'x_constants' in kwargs - data = RealData(kwargs.get('x_constants'), obs, sy=yerr) - fit_type = 2 - elif length == len(obs) // 2: - data = RealData(obs[:length], obs[length:], sx=xerr, sy=yerr) - fit_type = 0 - else: - raise Exception('x and y do not fit together.') - - model = Model(func) - - odr = ODR(data, model, beta0, partol=np.finfo(np.float64).eps) - odr.set_job(fit_type=fit_type, deriv=1) - output = odr.run() - if print_output and not silent: - print(*output.stopreason) - print('chisquare/d.o.f.:', output.res_var) - print_output = 0 - beta0 = output.beta - return output.beta[kwargs.get('n')] - res = [] - for n in range(n_parms): - res.append(derived_observable(do_the_fit, obs, function=func, xerr=xerr, yerr=yerr, x_constants=x_constants, num_grad=True, n=n, **kwargs)) - return res diff --git a/pyerrors/input/__init__.py b/pyerrors/input/__init__.py index 24766e77..2797841c 100644 --- a/pyerrors/input/__init__.py +++ b/pyerrors/input/__init__.py @@ -1,5 +1,6 @@ from . import bdio from . import hadrons -from . import sfcf -from . import openQCD +from . import json from . import misc +from . import openQCD +from . import sfcf diff --git a/pyerrors/input/bdio.py b/pyerrors/input/bdio.py index 3e29924b..0a15cceb 100644 --- a/pyerrors/input/bdio.py +++ b/pyerrors/input/bdio.py @@ -226,7 +226,7 @@ def write_ADerrors(obs_list, file_path, bdio_path='./libbdio.so', **kwargs): for key in keys: try: # Try to convert key to integer ids.append(int(key)) - except: # If not possible construct a hash + except Exception: # If not possible construct a hash ids.append(int(hashlib.sha256(key.encode('utf-8')).hexdigest(), 16) % 10 ** 8) print('ids', ids) nt = [] diff --git a/pyerrors/input/hadrons.py b/pyerrors/input/hadrons.py index 5f3d2646..efe4feb1 100644 --- a/pyerrors/input/hadrons.py +++ b/pyerrors/input/hadrons.py @@ -1,25 +1,15 @@ -#!/usr/bin/env python -# coding: utf-8 - import os import h5py import numpy as np from ..obs import Obs, CObs from ..correlators import Corr -from ..npr import Npr_matrix -def _get_files(path, filestem): - ls = [] - for (dirpath, dirnames, filenames) in os.walk(path): - ls.extend(filenames) - break +def _get_files(path, filestem, idl): + ls = os.listdir(path) # Clean up file list - files = [] - for line in ls: - if line.startswith(filestem): - files.append(line) + files = list(filter(lambda x: x.startswith(filestem), ls)) if not files: raise Exception('No files starting with', filestem, 'in folder', path) @@ -30,18 +20,31 @@ def _get_files(path, filestem): # Sort according to configuration number files.sort(key=get_cnfg_number) - # Check that configurations are evenly spaced cnfg_numbers = [] + filtered_files = [] for line in files: - cnfg_numbers.append(get_cnfg_number(line)) + no = get_cnfg_number(line) + if idl: + if no in list(idl): + filtered_files.append(line) + cnfg_numbers.append(no) + else: + filtered_files.append(line) + cnfg_numbers.append(no) - if not all(np.diff(cnfg_numbers) == np.diff(cnfg_numbers)[0]): + # Check that configurations are evenly spaced + dc = np.unique(np.diff(cnfg_numbers)) + if np.any(dc < 0): + raise Exception("Unsorted files") + if len(dc) == 1: + idx = range(cnfg_numbers[0], cnfg_numbers[-1] + dc[0], dc[0]) + else: raise Exception('Configurations are not evenly spaced.') - return files, cnfg_numbers + return filtered_files, idx -def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson'): +def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson', idl=None): """Read hadrons meson hdf5 file and extract the meson labeled 'meson' Parameters @@ -59,9 +62,11 @@ def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson'): Label of the upmost directory in the hdf5 file, default 'meson' for outputs of the Meson module. Can be altered to read input from other modules with similar structures. + idl : range + If specified only configurations in the given range are read in. """ - files, cnfg_numbers = _get_files(path, filestem) + files, idx = _get_files(path, filestem, idl) corr_data = [] infos = [] @@ -78,27 +83,69 @@ def read_meson_hd5(path, filestem, ens_id, meson='meson_0', tree='meson'): l_obs = [] for c in corr_data.T: - l_obs.append(Obs([c], [ens_id], idl=[cnfg_numbers])) + l_obs.append(Obs([c], [ens_id], idl=[idx])) corr = Corr(l_obs) corr.tag = r", ".join(infos) return corr -def read_ExternalLeg_hd5(path, filestem, ens_id, order='F'): +class Npr_matrix(np.ndarray): + + def __new__(cls, input_array, mom_in=None, mom_out=None): + obj = np.asarray(input_array).view(cls) + obj.mom_in = mom_in + obj.mom_out = mom_out + return obj + + @property + def g5H(self): + """Gamma_5 hermitean conjugate + + Uses the fact that the propagator is gamma5 hermitean, so just the + in and out momenta of the propagator are exchanged. + """ + return Npr_matrix(self, + mom_in=self.mom_out, + mom_out=self.mom_in) + + def _propagate_mom(self, other, name): + s_mom = getattr(self, name, None) + o_mom = getattr(other, name, None) + if s_mom is not None and o_mom is not None: + if not np.allclose(s_mom, o_mom): + raise Exception(name + ' does not match.') + return o_mom if o_mom is not None else s_mom + + def __matmul__(self, other): + return self.__new__(Npr_matrix, + super().__matmul__(other), + self._propagate_mom(other, 'mom_in'), + self._propagate_mom(other, 'mom_out')) + + def __array_finalize__(self, obj): + if obj is None: + return + self.mom_in = getattr(obj, 'mom_in', None) + self.mom_out = getattr(obj, 'mom_out', None) + + +def read_ExternalLeg_hd5(path, filestem, ens_id, idl=None): """Read hadrons ExternalLeg hdf5 file and output an array of CObs Parameters - ----------------- - path -- path to the files to read - filestem -- namestem of the files to read - ens_id -- name of the ensemble, required for internal bookkeeping - order -- order in which the array is to be reshaped, - 'F' for the first index changing fastest (9 4x4 matrices) default. - 'C' for the last index changing fastest (16 3x3 matrices), + ---------- + path : str + path to the files to read + filestem : str + namestem of the files to read + ens_id : str + name of the ensemble, required for internal bookkeeping + idl : range + If specified only configurations in the given range are read in. """ - files, cnfg_numbers = _get_files(path, filestem) + files, idx = _get_files(path, filestem, idl) mom = None @@ -107,9 +154,7 @@ def read_ExternalLeg_hd5(path, filestem, ens_id, order='F'): file = h5py.File(path + '/' + hd5_file, "r") raw_data = file['ExternalLeg/corr'][0][0].view('complex') corr_data.append(raw_data) - if mom is not None: - assert np.allclose(mom, np.array(str(file['ExternalLeg/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int)) - else: + if mom is None: mom = np.array(str(file['ExternalLeg/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int) file.close() corr_data = np.array(corr_data) @@ -118,28 +163,29 @@ def read_ExternalLeg_hd5(path, filestem, ens_id, order='F'): matrix = np.empty((rolled_array.shape[:-1]), dtype=object) for si, sj, ci, cj in np.ndindex(rolled_array.shape[:-1]): - real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id], idl=[cnfg_numbers]) - imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id], idl=[cnfg_numbers]) + real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id], idl=[idx]) + imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id], idl=[idx]) matrix[si, sj, ci, cj] = CObs(real, imag) - matrix[si, sj, ci, cj].gamma_method() - return Npr_matrix(matrix.swapaxes(1, 2).reshape((12, 12), order=order), mom_in=mom) + return Npr_matrix(matrix, mom_in=mom) -def read_Bilinear_hd5(path, filestem, ens_id, order='F'): +def read_Bilinear_hd5(path, filestem, ens_id, idl=None): """Read hadrons Bilinear hdf5 file and output an array of CObs Parameters - ----------------- - path -- path to the files to read - filestem -- namestem of the files to read - ens_id -- name of the ensemble, required for internal bookkeeping - order -- order in which the array is to be reshaped, - 'F' for the first index changing fastest (9 4x4 matrices) default. - 'C' for the last index changing fastest (16 3x3 matrices), + ---------- + path : str + path to the files to read + filestem : str + namestem of the files to read + ens_id : str + name of the ensemble, required for internal bookkeeping + idl : range + If specified only configurations in the given range are read in. """ - files, cnfg_numbers = _get_files(path, filestem) + files, idx = _get_files(path, filestem, idl) mom_in = None mom_out = None @@ -153,13 +199,9 @@ def read_Bilinear_hd5(path, filestem, ens_id, order='F'): corr_data[name] = [] raw_data = file['Bilinear/Bilinear_' + str(i) + '/corr'][0][0].view('complex') corr_data[name].append(raw_data) - if mom_in is not None: - assert np.allclose(mom_in, np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int)) - else: + if mom_in is None: mom_in = np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int) - if mom_out is not None: - assert np.allclose(mom_out, np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int)) - else: + if mom_out is None: mom_out = np.array(str(file['Bilinear/Bilinear_' + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int) file.close() @@ -173,11 +215,117 @@ def read_Bilinear_hd5(path, filestem, ens_id, order='F'): matrix = np.empty((rolled_array.shape[:-1]), dtype=object) for si, sj, ci, cj in np.ndindex(rolled_array.shape[:-1]): - real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id], idl=[cnfg_numbers]) - imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id], idl=[cnfg_numbers]) + real = Obs([rolled_array[si, sj, ci, cj].real], [ens_id], idl=[idx]) + imag = Obs([rolled_array[si, sj, ci, cj].imag], [ens_id], idl=[idx]) matrix[si, sj, ci, cj] = CObs(real, imag) - matrix[si, sj, ci, cj].gamma_method() - result_dict[key] = Npr_matrix(matrix.swapaxes(1, 2).reshape((12, 12), order=order), mom_in=mom_in, mom_out=mom_out) + result_dict[key] = Npr_matrix(matrix, mom_in=mom_in, mom_out=mom_out) return result_dict + + +def read_Fourquark_hd5(path, filestem, ens_id, idl=None, vertices=["VA", "AV"]): + """Read hadrons FourquarkFullyConnected hdf5 file and output an array of CObs + + Parameters + ---------- + path : str + path to the files to read + filestem : str + namestem of the files to read + ens_id : str + name of the ensemble, required for internal bookkeeping + idl : range + If specified only configurations in the given range are read in. + vertices : list + Vertex functions to be extracted. + """ + + files, idx = _get_files(path, filestem, idl) + + mom_in = None + mom_out = None + + vertex_names = [] + for vertex in vertices: + vertex_names += _get_lorentz_names(vertex) + + corr_data = {} + + tree = 'FourQuarkFullyConnected/FourQuarkFullyConnected_' + + for hd5_file in files: + file = h5py.File(path + '/' + hd5_file, "r") + + for i in range(32): + name = (file[tree + str(i) + '/info'].attrs['gammaA'][0].decode('UTF-8'), file[tree + str(i) + '/info'].attrs['gammaB'][0].decode('UTF-8')) + if name in vertex_names: + if name not in corr_data: + corr_data[name] = [] + raw_data = file[tree + str(i) + '/corr'][0][0].view('complex') + corr_data[name].append(raw_data) + if mom_in is None: + mom_in = np.array(str(file[tree + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int) + if mom_out is None: + mom_out = np.array(str(file[tree + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int) + + file.close() + + intermediate_dict = {} + + for vertex in vertices: + lorentz_names = _get_lorentz_names(vertex) + for v_name in lorentz_names: + if vertex not in intermediate_dict: + intermediate_dict[vertex] = np.array(corr_data[v_name]) + else: + intermediate_dict[vertex] += np.array(corr_data[v_name]) + + result_dict = {} + + for key, data in intermediate_dict.items(): + + rolled_array = np.moveaxis(data, 0, 8) + + matrix = np.empty((rolled_array.shape[:-1]), dtype=object) + for index in np.ndindex(rolled_array.shape[:-1]): + real = Obs([rolled_array[index].real], [ens_id], idl=[idx]) + imag = Obs([rolled_array[index].imag], [ens_id], idl=[idx]) + matrix[index] = CObs(real, imag) + + result_dict[key] = Npr_matrix(matrix, mom_in=mom_in, mom_out=mom_out) + + return result_dict + + +def _get_lorentz_names(name): + assert len(name) == 2 + + res = [] + + if not set(name) <= set(['S', 'P', 'V', 'A', 'T']): + raise Exception("Name can only contain 'S', 'P', 'V', 'A' or 'T'") + + if 'S' in name or 'P' in name: + if not set(name) <= set(['S', 'P']): + raise Exception("'" + name + "' is not a Lorentz scalar") + + g_names = {'S': 'Identity', + 'P': 'Gamma5'} + + res.append((g_names[name[0]], g_names[name[1]])) + + elif 'T' in name: + if not set(name) <= set(['T']): + raise Exception("'" + name + "' is not a Lorentz scalar") + raise Exception("Tensor operators not yet implemented.") + else: + if not set(name) <= set(['V', 'A']): + raise Exception("'" + name + "' is not a Lorentz scalar") + lorentz_index = ['X', 'Y', 'Z', 'T'] + + for ind in lorentz_index: + res.append(('Gamma' + ind + (name[0] == 'A') * 'Gamma5', + 'Gamma' + ind + (name[1] == 'A') * 'Gamma5')) + + return res diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py new file mode 100644 index 00000000..131dccf3 --- /dev/null +++ b/pyerrors/input/json.py @@ -0,0 +1,453 @@ +import json +import gzip +import numpy as np +import getpass +import socket +import datetime +import platform +import warnings +from ..obs import Obs +from ..covobs import Covobs +from .. import version as pyerrorsversion + + +def create_json_string(ol, description='', indent=1): + """Generate the string for the export of a list of Obs or structures containing Obs + to a .json(.gz) file + + Parameters + ---------- + ol : list + List of objects that will be exported. At the moments, these objects can be + either of: Obs, list, numpy.ndarray. + All Obs inside a structure have to be defined on the same set of configurations. + description : str + Optional string that describes the contents of the json file. + indent : int + Specify the indentation level of the json file. None or 0 is permissible and + saves disk space. + """ + + def _default(self, obj): + return str(obj) + my_encoder = json.JSONEncoder + _default.default = json.JSONEncoder().default + my_encoder.default = _default + + class Deltalist: + def __init__(self, li): + self.cnfg = li[0] + self.deltas = li[1:] + + def __repr__(self): + s = '[%d' % (self.cnfg) + for d in self.deltas: + s += ', %1.15e' % (d) + s += ']' + return s + + def __str__(self): + return self.__repr__() + + class Floatlist: + def __init__(self, li): + self.li = list(li) + + def __repr__(self): + s = '[' + for i in range(len(self.li)): + if i > 0: + s += ', ' + s += '%1.15e' % (self.li[i]) + s += ']' + return s + + def __str__(self): + return self.__repr__() + + def _gen_data_d_from_list(ol): + dl = [] + for name in ol[0].mc_names: + ed = {} + ed['id'] = name + ed['replica'] = [] + for r_name in ol[0].e_content[name]: + rd = {} + rd['name'] = r_name + if ol[0].is_merged.get(r_name, False): + rd['is_merged'] = True + rd['deltas'] = [] + for i in range(len(ol[0].idl[r_name])): + rd['deltas'].append([ol[0].idl[r_name][i]]) + for o in ol: + rd['deltas'][-1].append(o.deltas[r_name][i]) + rd['deltas'][-1] = Deltalist(rd['deltas'][-1]) + ed['replica'].append(rd) + dl.append(ed) + return dl + + def _gen_cdata_d_from_list(ol): + dl = [] + for name in ol[0].cov_names: + ed = {} + ed['id'] = name + ed['layout'] = str(ol[0].covobs[name].cov.shape).lstrip('(').rstrip(')').rstrip(',') + ed['cov'] = Floatlist(np.ravel(ol[0].covobs[name].cov)) + ncov = ol[0].covobs[name].cov.shape[0] + ed['grad'] = [] + for i in range(ncov): + ed['grad'].append([]) + for o in ol: + ed['grad'][-1].append(o.covobs[name].grad[i][0]) + ed['grad'][-1] = Floatlist(ed['grad'][-1]) + dl.append(ed) + return dl + + def _assert_equal_properties(ol, otype=Obs): + for o in ol: + if not isinstance(o, otype): + raise Exception("Wrong data type in list.") + for o in ol[1:]: + if not ol[0].is_merged == o.is_merged: + raise Exception("All Obs in list have to be defined on the same set of configs.") + if not ol[0].reweighted == o.reweighted: + raise Exception("All Obs in list have to have the same property 'reweighted'.") + if not ol[0].e_content == o.e_content: + raise Exception("All Obs in list have to be defined on the same set of configs.") + if not ol[0].idl == o.idl: + raise Exception("All Obs in list have to be defined on the same set of configurations.") + + def write_Obs_to_dict(o): + d = {} + d['type'] = 'Obs' + d['layout'] = '1' + if o.tag: + d['tag'] = [o.tag] + if o.reweighted: + d['reweighted'] = o.reweighted + d['value'] = [o.value] + data = _gen_data_d_from_list([o]) + if len(data) > 0: + d['data'] = data + cdata = _gen_cdata_d_from_list([o]) + if len(cdata) > 0: + d['cdata'] = cdata + return d + + def write_List_to_dict(ol): + _assert_equal_properties(ol) + d = {} + d['type'] = 'List' + d['layout'] = '%d' % len(ol) + taglist = [o.tag for o in ol] + if np.any([tag is not None for tag in taglist]): + d['tag'] = taglist + if ol[0].reweighted: + d['reweighted'] = ol[0].reweighted + d['value'] = [o.value for o in ol] + data = _gen_data_d_from_list(ol) + if len(data) > 0: + d['data'] = data + cdata = _gen_cdata_d_from_list(ol) + if len(cdata) > 0: + d['cdata'] = cdata + return d + + def write_Array_to_dict(oa): + ol = np.ravel(oa) + _assert_equal_properties(ol) + d = {} + d['type'] = 'Array' + d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',') + taglist = [o.tag for o in ol] + if np.any([tag is not None for tag in taglist]): + d['tag'] = taglist + if ol[0].reweighted: + d['reweighted'] = ol[0].reweighted + d['value'] = [o.value for o in ol] + data = _gen_data_d_from_list(ol) + if len(data) > 0: + d['data'] = data + cdata = _gen_cdata_d_from_list(ol) + if len(cdata) > 0: + d['cdata'] = cdata + return d + + if not isinstance(ol, list): + ol = [ol] + + d = {} + d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__) + d['version'] = '0.1' + d['who'] = getpass.getuser() + d['date'] = datetime.datetime.now().astimezone().strftime('%Y-%m-%d %H:%M:%S %z') + d['host'] = socket.gethostname() + ', ' + platform.platform() + + if description: + d['description'] = description + d['obsdata'] = [] + for io in ol: + if isinstance(io, Obs): + d['obsdata'].append(write_Obs_to_dict(io)) + elif isinstance(io, list): + d['obsdata'].append(write_List_to_dict(io)) + elif isinstance(io, np.ndarray): + d['obsdata'].append(write_Array_to_dict(io)) + + jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False) + + def remove_quotationmarks(s): + """Workaround for un-quoting of delta lists, adds 5% of work + but is save, compared to a simple replace that could destroy the structure + """ + deltas = False + split = s.split('\n') + for i in range(len(split)): + if '"deltas":' in split[i] or '"cov":' in split[i] or '"grad":' in split[i]: + deltas = True + if deltas: + split[i] = split[i].replace('"[', '[').replace(']"', ']') + if split[i][-1] == ']': + deltas = False + return '\n'.join(split) + + jsonstring = remove_quotationmarks(jsonstring) + return jsonstring + + +def dump_to_json(ol, fname, description='', indent=1, gz=True): + """Export a list of Obs or structures containing Obs to a .json(.gz) file + + Parameters + ---------- + ol : list + List of objects that will be exported. At the moments, these objects can be + either of: Obs, list, numpy.ndarray. + All Obs inside a structure have to be defined on the same set of configurations. + fname : str + Filename of the output file. + description : str + Optional string that describes the contents of the json file. + indent : int + Specify the indentation level of the json file. None or 0 is permissible and + saves disk space. + gz : bool + If True, the output is a gzipped json. If False, the output is a json file. + """ + + jsonstring = create_json_string(ol, description, indent) + + if not fname.endswith('.json') and not fname.endswith('.gz'): + fname += '.json' + + if gz: + if not fname.endswith('.gz'): + fname += '.gz' + + fp = gzip.open(fname, 'wb') + fp.write(jsonstring.encode('utf-8')) + else: + fp = open(fname, 'w', encoding='utf-8') + fp.write(jsonstring) + fp.close() + + +def import_json_string(json_string, verbose=True, full_output=False): + """Reconstruct a list of Obs or structures containing Obs from a json string. + + The following structures are supported: Obs, list, numpy.ndarray + If the list contains only one element, it is unpacked from the list. + + Parameters + ---------- + json_string : str + json string containing the data. + verbose : bool + Print additional information that was written to the file. + full_output : bool + If True, a dict containing auxiliary information and the data is returned. + If False, only the data is returned. + """ + + def _gen_obsd_from_datad(d): + retd = {} + if d: + retd['names'] = [] + retd['idl'] = [] + retd['deltas'] = [] + retd['is_merged'] = {} + for ens in d: + for rep in ens['replica']: + retd['names'].append(rep['name']) + retd['idl'].append([di[0] for di in rep['deltas']]) + retd['deltas'].append(np.array([di[1:] for di in rep['deltas']])) + retd['is_merged'][rep['name']] = rep.get('is_merged', False) + return retd + + def _gen_covobsd_from_cdatad(d): + retd = {} + for ens in d: + retl = [] + name = ens['id'] + layouts = ens.get('layout', '1').strip() + layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] + cov = np.reshape(ens['cov'], layout) + grad = ens['grad'] + nobs = len(grad[0]) + for i in range(nobs): + retl.append({'name': name, 'cov': cov, 'grad': [g[i] for g in grad]}) + retd[name] = retl + return retd + + def get_Obs_from_dict(o): + layouts = o.get('layout', '1').strip() + if layouts != '1': + raise Exception("layout is %s has to be 1 for type Obs." % (layouts), RuntimeWarning) + + values = o['value'] + od = _gen_obsd_from_datad(o.get('data', {})) + cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) + + if od: + ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl']) + ret.is_merged = od['is_merged'] + else: + ret = Obs([], []) + ret._value = values[0] + for name in cd: + co = cd[name][0] + ret._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) + ret.names.append(co['name']) + + ret.reweighted = o.get('reweighted', False) + ret.tag = o.get('tag', [None])[0] + return ret + + def get_List_from_dict(o): + layouts = o.get('layout', '1').strip() + layout = int(layouts) + values = o['value'] + od = _gen_obsd_from_datad(o.get('data', {})) + cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) + + ret = [] + taglist = o.get('tag', layout * [None]) + for i in range(layout): + if od: + ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl'])) + ret[-1].is_merged = od['is_merged'] + else: + ret.append(Obs([], [])) + ret[-1]._value = values[i] + print('Created Obs with means= ', values[i]) + for name in cd: + co = cd[name][i] + ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) + ret[-1].names.append(co['name']) + + ret[-1].reweighted = o.get('reweighted', False) + ret[-1].tag = taglist[i] + return ret + + def get_Array_from_dict(o): + layouts = o.get('layout', '1').strip() + layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] + N = np.prod(layout) + values = o['value'] + od = _gen_obsd_from_datad(o.get('data', {})) + cd = _gen_covobsd_from_cdatad(o.get('cdata', {})) + + ret = [] + taglist = o.get('tag', N * [None]) + for i in range(N): + if od: + ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl'])) + ret[-1].is_merged = od['is_merged'] + else: + ret.append(Obs([], [])) + ret[-1]._value = values[i] + for name in cd: + co = cd[name][i] + ret[-1]._covobs[name] = Covobs(None, co['cov'], co['name'], grad=co['grad']) + ret[-1].names.append(co['name']) + ret[-1].reweighted = o.get('reweighted', False) + ret[-1].tag = taglist[i] + return np.reshape(ret, layout) + + json_dict = json.loads(json_string) + + prog = json_dict.get('program', '') + version = json_dict.get('version', '') + who = json_dict.get('who', '') + date = json_dict.get('date', '') + host = json_dict.get('host', '') + if prog and verbose: + print('Data has been written using %s.' % (prog)) + if version and verbose: + print('Format version %s' % (version)) + if np.any([who, date, host] and verbose): + print('Written by %s on %s on host %s' % (who, date, host)) + description = json_dict.get('description', '') + if description and verbose: + print() + print('Description: ', description) + obsdata = json_dict['obsdata'] + ol = [] + for io in obsdata: + if io['type'] == 'Obs': + ol.append(get_Obs_from_dict(io)) + elif io['type'] == 'List': + ol.append(get_List_from_dict(io)) + elif io['type'] == 'Array': + ol.append(get_Array_from_dict(io)) + + if full_output: + retd = {} + retd['program'] = prog + retd['version'] = version + retd['who'] = who + retd['date'] = date + retd['host'] = host + retd['description'] = description + retd['obsdata'] = ol + + return retd + else: + if len(obsdata) == 1: + ol = ol[0] + + return ol + + +def load_json(fname, verbose=True, gz=True, full_output=False): + """Import a list of Obs or structures containing Obs from a .json.gz file. + + The following structures are supported: Obs, list, numpy.ndarray + If the list contains only one element, it is unpacked from the list. + + Parameters + ---------- + fname : str + Filename of the input file. + verbose : bool + Print additional information that was written to the file. + gz : bool + If True, assumes that data is gzipped. If False, assumes JSON file. + full_output : bool + If True, a dict containing auxiliary information and the data is returned. + If False, only the data is returned. + """ + if not fname.endswith('.json') and not fname.endswith('.gz'): + fname += '.json' + if gz: + if not fname.endswith('.gz'): + fname += '.gz' + with gzip.open(fname, 'r') as fin: + d = fin.read().decode('utf-8') + else: + if fname.endswith('.gz'): + warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) + with open(fname, 'r', encoding='utf-8') as fin: + d = fin.read() + + return import_json_string(d, verbose, full_output) diff --git a/pyerrors/linalg.py b/pyerrors/linalg.py index d993f291..6eac3e69 100644 --- a/pyerrors/linalg.py +++ b/pyerrors/linalg.py @@ -1,128 +1,18 @@ import numpy as np -from autograd import jacobian import autograd.numpy as anp # Thinly-wrapped numpy -from .obs import derived_observable, CObs, Obs, _merge_idx, _expand_deltas_for_merge, _filter_zeroes - -from functools import partial -from autograd.extend import defvjp - - -def derived_array(func, data, **kwargs): - """Construct a derived Obs for a matrix valued function according to func(data, **kwargs) using automatic differentiation. - - Parameters - ---------- - func : object - arbitrary function of the form func(data, **kwargs). For the - automatic differentiation to work, all numpy functions have to have - the autograd wrapper (use 'import autograd.numpy as anp'). - data : list - list of Obs, e.g. [obs1, obs2, obs3]. - man_grad : list - manually supply a list or an array which contains the jacobian - of func. Use cautiously, supplying the wrong derivative will - not be intercepted. - """ - - data = np.asarray(data) - raveled_data = data.ravel() - - # Workaround for matrix operations containing non Obs data - for i_data in raveled_data: - if isinstance(i_data, Obs): - first_name = i_data.names[0] - first_shape = i_data.shape[first_name] - first_idl = i_data.idl[first_name] - break - - for i in range(len(raveled_data)): - if isinstance(raveled_data[i], (int, float)): - raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name], idl=[first_idl]) - - n_obs = len(raveled_data) - new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) - - is_merged = len(list(filter(lambda o: o.is_merged is True, raveled_data))) > 0 - reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 - new_idl_d = {} - for name in new_names: - idl = [] - for i_data in raveled_data: - tmp = i_data.idl.get(name) - if tmp is not None: - idl.append(tmp) - new_idl_d[name] = _merge_idx(idl) - if not is_merged: - is_merged = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) - - if data.ndim == 1: - values = np.array([o.value for o in data]) - else: - values = np.vectorize(lambda x: x.value)(data) - - new_values = func(values, **kwargs) - - new_r_values = {} - for name in new_names: - tmp_values = np.zeros(n_obs) - for i, item in enumerate(raveled_data): - tmp = item.r_values.get(name) - if tmp is None: - tmp = item.value - tmp_values[i] = tmp - tmp_values = np.array(tmp_values).reshape(data.shape) - new_r_values[name] = func(tmp_values, **kwargs) - - if 'man_grad' in kwargs: - deriv = np.asarray(kwargs.get('man_grad')) - if new_values.shape + data.shape != deriv.shape: - raise Exception('Manual derivative does not have correct shape.') - else: - deriv = jacobian(func)(values, **kwargs) - - final_result = np.zeros(new_values.shape, dtype=object) - - d_extracted = {} - for name in new_names: - d_extracted[name] = [] - for i_dat, dat in enumerate(data): - ens_length = len(new_idl_d[name]) - d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas[name], o.idl[name], o.shape[name], new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) - - for i_val, new_val in np.ndenumerate(new_values): - new_deltas = {} - for name in new_names: - ens_length = d_extracted[name][0].shape[-1] - new_deltas[name] = np.zeros(ens_length) - for i_dat, dat in enumerate(d_extracted[name]): - new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) - - new_samples = [] - new_means = [] - new_idl = [] - if is_merged: - filtered_names, filtered_deltas, filtered_idl_d = _filter_zeroes(new_names, new_deltas, new_idl_d) - else: - filtered_names = new_names - filtered_deltas = new_deltas - filtered_idl_d = new_idl_d - for name in filtered_names: - new_samples.append(filtered_deltas[name]) - new_means.append(new_r_values[name][i_val]) - new_idl.append(filtered_idl_d[name]) - final_result[i_val] = Obs(new_samples, filtered_names, means=new_means, idl=new_idl) - final_result[i_val]._value = new_val - final_result[i_val].is_merged = is_merged - final_result[i_val].reweighted = reweighted - - return final_result +from .obs import derived_observable, CObs, Obs, import_jackknife def matmul(*operands): """Matrix multiply all operands. - Supports real and complex valued matrices and is faster compared to - standard multiplication via the @ operator. + Parameters + ---------- + operands : numpy.ndarray + Arbitrary number of 2d-numpy arrays which can be real or complex + Obs valued. + + This implementation is faster compared to standard multiplication via the @ operator. """ if any(isinstance(o[0, 0], CObs) for o in operands): extended_operands = [] @@ -152,8 +42,8 @@ def matmul(*operands): def multi_dot_i(operands): return multi_dot(operands, 'Imag') - Nr = derived_array(multi_dot_r, extended_operands) - Ni = derived_array(multi_dot_i, extended_operands) + Nr = derived_observable(multi_dot_r, extended_operands, array_mode=True) + Ni = derived_observable(multi_dot_i, extended_operands, array_mode=True) res = np.empty_like(Nr) for (n, m), entry in np.ndenumerate(Nr): @@ -166,7 +56,142 @@ def matmul(*operands): for op in operands[1:]: stack = stack @ op return stack - return derived_array(multi_dot, operands) + return derived_observable(multi_dot, operands, array_mode=True) + + +def jack_matmul(*operands): + """Matrix multiply both operands making use of the jackknife approximation. + + Parameters + ---------- + operands : numpy.ndarray + Arbitrary number of 2d-numpy arrays which can be real or complex + Obs valued. + + For large matrices this is considerably faster compared to matmul. + """ + + def _exp_to_jack(matrix): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = entry.export_jackknife() + return base_matrix + + def _imp_from_jack(matrix, name, idl): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = import_jackknife(entry, name, [idl]) + return base_matrix + + def _exp_to_jack_c(matrix): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = entry.real.export_jackknife() + 1j * entry.imag.export_jackknife() + return base_matrix + + def _imp_from_jack_c(matrix, name, idl): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = CObs(import_jackknife(entry.real, name, [idl]), + import_jackknife(entry.imag, name, [idl])) + return base_matrix + + if any(isinstance(o.flat[0], CObs) for o in operands): + name = operands[0].flat[0].real.names[0] + idl = operands[0].flat[0].real.idl[name] + + r = _exp_to_jack_c(operands[0]) + for op in operands[1:]: + if isinstance(op.flat[0], CObs): + r = r @ _exp_to_jack_c(op) + else: + r = r @ op + return _imp_from_jack_c(r, name, idl) + else: + name = operands[0].flat[0].names[0] + idl = operands[0].flat[0].idl[name] + + r = _exp_to_jack(operands[0]) + for op in operands[1:]: + if isinstance(op.flat[0], Obs): + r = r @ _exp_to_jack(op) + else: + r = r @ op + return _imp_from_jack(r, name, idl) + + +def einsum(subscripts, *operands): + """Wrapper for numpy.einsum + + Parameters + ---------- + subscripts : str + Subscripts for summation (see numpy documentation for details) + operands : numpy.ndarray + Arbitrary number of 2d-numpy arrays which can be real or complex + Obs valued. + """ + + def _exp_to_jack(matrix): + base_matrix = [] + for index, entry in np.ndenumerate(matrix): + base_matrix.append(entry.export_jackknife()) + return np.asarray(base_matrix).reshape(matrix.shape + base_matrix[0].shape) + + def _exp_to_jack_c(matrix): + base_matrix = [] + for index, entry in np.ndenumerate(matrix): + base_matrix.append(entry.real.export_jackknife() + 1j * entry.imag.export_jackknife()) + return np.asarray(base_matrix).reshape(matrix.shape + base_matrix[0].shape) + + def _imp_from_jack(matrix, name, idl): + base_matrix = np.empty(shape=matrix.shape[:-1], dtype=object) + for index in np.ndindex(matrix.shape[:-1]): + base_matrix[index] = import_jackknife(matrix[index], name, [idl]) + return base_matrix + + def _imp_from_jack_c(matrix, name, idl): + base_matrix = np.empty(shape=matrix.shape[:-1], dtype=object) + for index in np.ndindex(matrix.shape[:-1]): + base_matrix[index] = CObs(import_jackknife(matrix[index].real, name, [idl]), + import_jackknife(matrix[index].imag, name, [idl])) + return base_matrix + + for op in operands: + if isinstance(op.flat[0], CObs): + name = op.flat[0].real.names[0] + idl = op.flat[0].real.idl[name] + break + elif isinstance(op.flat[0], Obs): + name = op.flat[0].names[0] + idl = op.flat[0].idl[name] + break + + conv_operands = [] + for op in operands: + if isinstance(op.flat[0], CObs): + conv_operands.append(_exp_to_jack_c(op)) + elif isinstance(op.flat[0], Obs): + conv_operands.append(_exp_to_jack(op)) + else: + conv_operands.append(op) + + tmp_subscripts = ','.join([o + '...' for o in subscripts.split(',')]) + extended_subscripts = '->'.join([o + '...' for o in tmp_subscripts.split('->')[:-1]] + [tmp_subscripts.split('->')[-1]]) + einsum_path = np.einsum_path(extended_subscripts, *conv_operands, optimize='optimal')[0] + jack_einsum = np.einsum(extended_subscripts, *conv_operands, optimize=einsum_path) + + if jack_einsum.dtype == complex: + result = _imp_from_jack_c(jack_einsum, name, idl) + elif jack_einsum.dtype == float: + result = _imp_from_jack(jack_einsum, name, idl) + else: + raise Exception("Result has unexpected datatype") + + if result.shape == (): + return result.flat[0] + else: + return result def inv(x): @@ -179,7 +204,12 @@ def cholesky(x): return _mat_mat_op(anp.linalg.cholesky, x) -def scalar_mat_op(op, obs, **kwargs): +def det(x): + """Determinant of Obs valued matrices.""" + return _scalar_mat_op(anp.linalg.det, x) + + +def _scalar_mat_op(op, obs, **kwargs): """Computes the matrix to scalar operation op to a given matrix of Obs.""" def _mat(x, **kwargs): dim = int(np.sqrt(len(x))) @@ -221,7 +251,7 @@ def _mat_mat_op(op, obs, **kwargs): if kwargs.get('num_grad') is True: op_big_matrix = _num_diff_mat_mat_op(op, big_matrix, **kwargs) else: - op_big_matrix = derived_array(lambda x, **kwargs: op(x), [big_matrix])[0] + op_big_matrix = derived_observable(lambda x, **kwargs: op(x), [big_matrix], array_mode=True)[0] dim = op_big_matrix.shape[0] op_A = op_big_matrix[0: dim // 2, 0: dim // 2] op_B = op_big_matrix[dim // 2:, 0: dim // 2] @@ -232,7 +262,7 @@ def _mat_mat_op(op, obs, **kwargs): else: if kwargs.get('num_grad') is True: return _num_diff_mat_mat_op(op, obs, **kwargs) - return derived_array(lambda x, **kwargs: op(x), [obs])[0] + return derived_observable(lambda x, **kwargs: op(x), [obs], array_mode=True)[0] def eigh(obs, **kwargs): @@ -271,31 +301,6 @@ def svd(obs, **kwargs): return (u, s, vh) -def slogdet(obs, **kwargs): - """Computes the determinant of a matrix of Obs via np.linalg.slogdet.""" - def _mat(x): - dim = int(np.sqrt(len(x))) - if np.sqrt(len(x)) != dim: - raise Exception('Input has to have dim**2 entries') - - mat = [] - for i in range(dim): - row = [] - for j in range(dim): - row.append(x[j + dim * i]) - mat.append(row) - - (sign, logdet) = anp.linalg.slogdet(np.array(mat)) - return sign * anp.exp(logdet) - - if isinstance(obs, np.ndarray): - return derived_observable(_mat, (1 * (obs.ravel())).tolist(), **kwargs) - elif isinstance(obs, list): - return derived_observable(_mat, obs, **kwargs) - else: - raise TypeError('Unproper type of input.') - - # Variants for numerical differentiation def _num_diff_mat_mat_op(op, obs, **kwargs): @@ -500,51 +505,3 @@ def _num_diff_svd(obs, **kwargs): res_mat2.append(row) return (np.array(res_mat0) @ np.identity(mid_index), np.array(res_mat1) @ np.identity(mid_index), np.array(res_mat2) @ np.identity(shape[1])) - - -# This code block is directly taken from the current master branch of autograd and remains -# only until the new version is released on PyPi -_dot = partial(anp.einsum, '...ij,...jk->...ik') - - -# batched diag -def _diag(a): - return anp.eye(a.shape[-1]) * a - - -# batched diagonal, similar to matrix_diag in tensorflow -def _matrix_diag(a): - reps = anp.array(a.shape) - reps[:-1] = 1 - reps[-1] = a.shape[-1] - newshape = list(a.shape) + [a.shape[-1]] - return _diag(anp.tile(a, reps).reshape(newshape)) - -# https://arxiv.org/pdf/1701.00392.pdf Eq(4.77) -# Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete - - -def grad_eig(ans, x): - """Gradient of a general square (complex valued) matrix""" - e, u = ans # eigenvalues as 1d array, eigenvectors in columns - n = e.shape[-1] - - def vjp(g): - ge, gu = g - ge = _matrix_diag(ge) - f = 1 / (e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20) - f -= _diag(f) - ut = anp.swapaxes(u, -1, -2) - r1 = f * _dot(ut, gu) - r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n))) - r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut) - if not anp.iscomplexobj(x): - r = anp.real(r) - # the derivative is still complex for real input (imaginary delta is allowed), real output - # but the derivative should be real in real input case when imaginary delta is forbidden - return r - return vjp - - -defvjp(anp.linalg.eig, grad_eig) -# End of the code block from autograd.master diff --git a/pyerrors/misc.py b/pyerrors/misc.py index 1baa5e53..e3bfbc33 100644 --- a/pyerrors/misc.py +++ b/pyerrors/misc.py @@ -1,18 +1,56 @@ +import pickle import numpy as np from .obs import Obs +def dump_object(obj, name, **kwargs): + """Dump object into pickle file. + + Parameters + ---------- + obj : object + object to be saved in the pickle file + name : str + name of the file + path : str + specifies a custom path for the file (default '.') + """ + if 'path' in kwargs: + file_name = kwargs.get('path') + '/' + name + '.p' + else: + file_name = name + '.p' + with open(file_name, 'wb') as fb: + pickle.dump(obj, fb) + + +def load_object(path): + """Load object from pickle file. + + Parameters + ---------- + path : str + path to the file + """ + with open(path, 'rb') as file: + return pickle.load(file) + + def gen_correlated_data(means, cov, name, tau=0.5, samples=1000): """ Generate observables with given covariance and autocorrelation times. Parameters ---------- - means -- list containing the mean value of each observable. - cov -- covariance matrix for the data to be geneated. - name -- ensemble name for the data to be geneated. - tau -- can either be a real number or a list with an entry for - every dataset. - samples -- number of samples to be generated for each observable. + means : list + list containing the mean value of each observable. + cov : numpy.ndarray + covariance matrix for the data to be generated. + name : str + ensemble name for the data to be geneated. + tau : float or list + can either be a real number or a list with an entry for + every dataset. + samples : int + number of samples to be generated for each observable. """ assert len(means) == cov.shape[-1] diff --git a/pyerrors/npr.py b/pyerrors/npr.py deleted file mode 100644 index 013e0d25..00000000 --- a/pyerrors/npr.py +++ /dev/null @@ -1,108 +0,0 @@ -import warnings -import numpy as np -from .linalg import inv, matmul -from .dirac import gamma, gamma5 - - -L = None -T = None - - -class Npr_matrix(np.ndarray): - - def __new__(cls, input_array, mom_in=None, mom_out=None): - obj = np.asarray(input_array).view(cls) - obj.mom_in = mom_in - obj.mom_out = mom_out - return obj - - @property - def g5H(self): - """Gamma_5 hermitean conjugate - - Returns gamma_5 @ M.T.conj() @ gamma_5 and exchanges in and out going - momenta. Works only for 12x12 matrices. - """ - if self.shape != (12, 12): - raise Exception('g5H only works for 12x12 matrices.') - extended_g5 = np.kron(np.eye(3, dtype=int), gamma5) - return Npr_matrix(matmul(extended_g5, self.conj().T, extended_g5), - mom_in=self.mom_out, - mom_out=self.mom_in) - - def _propagate_mom(self, other, name): - s_mom = getattr(self, name, None) - o_mom = getattr(other, name, None) - if s_mom is not None and o_mom is not None: - if not np.allclose(s_mom, o_mom): - raise Exception(name + ' does not match.') - return o_mom if o_mom is not None else s_mom - - def __matmul__(self, other): - return self.__new__(Npr_matrix, - super().__matmul__(other), - self._propagate_mom(other, 'mom_in'), - self._propagate_mom(other, 'mom_out')) - - def __array_finalize__(self, obj): - if obj is None: - return - self.mom_in = getattr(obj, 'mom_in', None) - self.mom_out = getattr(obj, 'mom_out', None) - - -def _check_geometry(): - if L is None: - raise Exception("Spatial extent 'L' not set.") - else: - if not isinstance(L, int): - raise Exception("Spatial extent 'L' must be an integer.") - if T is None: - raise Exception("Temporal extent 'T' not set.") - if not isinstance(T, int): - raise Exception("Temporal extent 'T' must be an integer.") - - -def inv_propagator(prop): - """ Inverts a 12x12 quark propagator""" - if prop.shape != (12, 12): - raise Exception("Only 12x12 propagators can be inverted.") - return Npr_matrix(inv(prop), prop.mom_in) - - -def Zq(inv_prop, fermion='Wilson'): - """ Calculates the quark field renormalization constant Zq - - Parameters - ---------- - inv_prop : array - Inverted 12x12 quark propagator - fermion : str - Fermion type for which the tree-level propagator is used - in the calculation of Zq. Default Wilson. - """ - _check_geometry() - mom = np.copy(inv_prop.mom_in) - mom[3] /= T / L - sin_mom = np.sin(2 * np.pi / L * mom) - - if fermion == 'Wilson': - p_slash = -1j * (sin_mom[0] * gamma[0] + sin_mom[1] * gamma[1] + sin_mom[2] * gamma[2] + sin_mom[3] * gamma[3]) / np.sum(sin_mom ** 2) - elif fermion == 'Continuum': - p_mom = 2 * np.pi / L * mom - p_slash = -1j * (p_mom[0] * gamma[0] + p_mom[1] * gamma[1] + p_mom[2] * gamma[2] + p_mom[3] * gamma[3]) / np.sum(p_mom ** 2) - elif fermion == 'DWF': - W = np.sum(1 - np.cos(2 * np.pi / L * mom)) - s2 = np.sum(sin_mom ** 2) - p_slash = -1j * (sin_mom[0] * gamma[0] + sin_mom[1] * gamma[1] + sin_mom[2] * gamma[2] + sin_mom[3] * gamma[3]) - p_slash /= 2 * (W - 1 + np.sqrt((1 - W) ** 2 + s2)) - else: - raise Exception("Fermion type '" + fermion + "' not implemented") - - res = 1 / 12. * np.trace(matmul(inv_prop, np.kron(np.eye(3, dtype=int), p_slash))) - res.gamma_method() - - if not res.imag.is_zero_within_error(5): - warnings.warn("Imaginary part of Zq is not zero within 5 sigma") - return res - return res.real diff --git a/pyerrors/obs.py b/pyerrors/obs.py index 4fbda90f..edef264e 100644 --- a/pyerrors/obs.py +++ b/pyerrors/obs.py @@ -6,6 +6,7 @@ from autograd import jacobian import matplotlib.pyplot as plt import numdifftools as nd from itertools import groupby +from .covobs import Covobs class Obs: @@ -41,7 +42,7 @@ class Obs: 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', - 'idl', 'is_merged', 'tag', '__dict__'] + 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] S_global = 2.0 S_dict = {} @@ -67,16 +68,21 @@ class Obs: already subtracted from the samples """ - if means is None: + if means is None and len(samples): if len(samples) != len(names): raise Exception('Length of samples and names incompatible.') if idl is not None: if len(idl) != len(names): raise Exception('Length of idl incompatible with samples and names.') - if len(names) != len(set(names)): - raise Exception('names are not unique.') - if not all(isinstance(x, str) for x in names): - raise TypeError('All names have to be strings.') + name_length = len(names) + if name_length > 1: + if name_length != len(set(names)): + raise Exception('names are not unique.') + if not all(isinstance(x, str) for x in names): + raise TypeError('All names have to be strings.') + else: + if not isinstance(names[0], str): + raise TypeError('All names have to be strings.') if min(len(x) for x in samples) <= 4: raise Exception('Samples have to have at least 5 entries.') @@ -84,48 +90,53 @@ class Obs: self.shape = {} self.r_values = {} self.deltas = {} + self._covobs = {} self.idl = {} - if idl is not None: - for name, idx in sorted(zip(names, idl)): - if isinstance(idx, range): - self.idl[name] = idx - elif isinstance(idx, (list, np.ndarray)): - dc = np.unique(np.diff(idx)) - if np.any(dc < 0): - raise Exception("Unsorted idx for idl[%s]" % (name)) - if len(dc) == 1: - self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) + if len(samples): + if idl is not None: + for name, idx in sorted(zip(names, idl)): + if isinstance(idx, range): + self.idl[name] = idx + elif isinstance(idx, (list, np.ndarray)): + dc = np.unique(np.diff(idx)) + if np.any(dc < 0): + raise Exception("Unsorted idx for idl[%s]" % (name)) + if len(dc) == 1: + self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) + else: + self.idl[name] = list(idx) else: - self.idl[name] = list(idx) - else: - raise Exception('incompatible type for idl[%s].' % (name)) - else: - for name, sample in sorted(zip(names, samples)): - self.idl[name] = range(1, len(sample) + 1) + raise Exception('incompatible type for idl[%s].' % (name)) + else: + for name, sample in sorted(zip(names, samples)): + self.idl[name] = range(1, len(sample) + 1) - if means is not None: - for name, sample, mean in sorted(zip(names, samples, means)): - self.shape[name] = len(self.idl[name]) - if len(sample) != self.shape[name]: - raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) - self.r_values[name] = mean - self.deltas[name] = sample - else: - for name, sample in sorted(zip(names, samples)): - self.shape[name] = len(self.idl[name]) - if len(sample) != self.shape[name]: - raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) - self.r_values[name] = np.mean(sample) - self.deltas[name] = sample - self.r_values[name] - self.is_merged = False - self.N = sum(list(self.shape.values())) + self._value = 0 + self.N = 0 + if means is not None: + for name, sample, mean in sorted(zip(names, samples, means)): + self.shape[name] = len(self.idl[name]) + self.N += self.shape[name] + self.r_values[name] = mean + self.deltas[name] = sample + else: + for name, sample in sorted(zip(names, samples)): + self.shape[name] = len(self.idl[name]) + self.N += self.shape[name] + if len(sample) != self.shape[name]: + raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) + self.r_values[name] = np.mean(sample) + self.deltas[name] = sample - self.r_values[name] + self._value += self.shape[name] * self.r_values[name] + self._value /= self.N - self._value = 0 - if means is None: - for name in self.names: - self._value += self.shape[name] * self.r_values[name] - self._value /= self.N + self.is_merged = {} + + else: + self._value = 0 + self.is_merged = {} + self.N = 0 self._dvalue = 0.0 self.ddvalue = 0.0 @@ -145,6 +156,14 @@ class Obs: def e_names(self): return sorted(set([o.split('|')[0] for o in self.names])) + @property + def cov_names(self): + return sorted(set([o for o in self.covobs.keys()])) + + @property + def mc_names(self): + return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) + @property def e_content(self): res = {} @@ -154,21 +173,26 @@ class Obs: res[e_name].append(e_name) return res + @property + def covobs(self): + return self._covobs + def gamma_method(self, **kwargs): - """Calculate the error and related properties of the Obs. + """Estimate the error and related properties of the Obs. Parameters ---------- S : float - specifies a custom value for the parameter S (default 2.0), can be - a float or an array of floats for different ensembles + specifies a custom value for the parameter S (default 2.0). + If set to 0 it is assumed that the data exhibits no + autocorrelation. In this case the error estimates coincides + with the sample standard error. tau_exp : float positive value triggers the critical slowing down analysis - (default 0.0), can be a float or an array of floats for different - ensembles + (default 0.0). N_sigma : float number of standard deviations from zero until the tail is - attached to the autocorrelation function (default 1) + attached to the autocorrelation function (default 1). fft : bool determines whether the fft algorithm is used for the computation of the autocorrelation function (default True) @@ -218,8 +242,7 @@ class Obs: _parse_kwarg('tau_exp') _parse_kwarg('N_sigma') - for e, e_name in enumerate(self.e_names): - + for e, e_name in enumerate(self.mc_names): r_length = [] for r_name in e_content[e_name]: if isinstance(self.idl[r_name], range): @@ -281,28 +304,40 @@ class Obs: self.e_windowsize[e_name] = n break else: - # Standard automatic windowing procedure - g_w = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) - g_w = np.exp(- np.arange(1, w_max) / g_w) - g_w / np.sqrt(np.arange(1, w_max) * e_N) - for n in range(1, w_max): - if n < w_max // 2 - 2: - _compute_drho(n + 1) - if g_w[n - 1] < 0 or n >= w_max - 1: - self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) - self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] - self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) - self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) - self.e_windowsize[e_name] = n - break + if self.S[e_name] == 0.0: + self.e_tauint[e_name] = 0.5 + self.e_dtauint[e_name] = 0.0 + self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) + self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) + self.e_windowsize[e_name] = 0 + else: + # Standard automatic windowing procedure + tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) + g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) + for n in range(1, w_max): + if n < w_max // 2 - 2: + _compute_drho(n + 1) + if g_w[n - 1] < 0 or n >= w_max - 1: + self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) + self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] + self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) + self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) + self.e_windowsize[e_name] = n + break self._dvalue += self.e_dvalue[e_name] ** 2 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 - self._dvalue = np.sqrt(self.dvalue) + for e_name in self.cov_names: + self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) + self.e_ddvalue[e_name] = 0 + self._dvalue += self.e_dvalue[e_name]**2 + + self._dvalue = np.sqrt(self._dvalue) if self._dvalue == 0.0: self.ddvalue = 0.0 else: - self.ddvalue = np.sqrt(self.ddvalue) / self.dvalue + self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue return def _calc_gamma(self, deltas, idx, shape, w_max, fft): @@ -348,21 +383,25 @@ class Obs: """ if self.tag is not None: print("Description:", self.tag) - if self.value == 0.0: - percentage = np.nan + if not hasattr(self, 'e_dvalue'): + print('Result\t %3.8e' % (self.value)) else: - percentage = np.abs(self.dvalue / self.value) * 100 - print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self.dvalue, self.ddvalue, percentage)) - if hasattr(self, 'e_dvalue'): + if self.value == 0.0: + percentage = np.nan + else: + percentage = np.abs(self._dvalue / self.value) * 100 + print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) if len(self.e_names) > 1: print(' Ensemble errors:') - for e_name in self.e_names: + for e_name in self.mc_names: if len(self.e_names) > 1: print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) if self.tau_exp[e_name] > 0: print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) else: print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) + for e_name in self.cov_names: + print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) if ens_content is True: if len(self.e_names) == 1: print(self.N, 'samples in', len(self.e_names), 'ensemble:') @@ -370,25 +409,28 @@ class Obs: print(self.N, 'samples in', len(self.e_names), 'ensembles:') my_string_list = [] for key, value in sorted(self.e_content.items()): - my_string = ' ' + "\u00B7 Ensemble '" + key + "' " - if len(value) == 1: - my_string += f': {self.shape[value[0]]} configurations' - if isinstance(self.idl[value[0]], range): - my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' - else: - my_string += ' (irregular range)' - else: - sublist = [] - for v in value: - my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " - my_substring += f': {self.shape[v]} configurations' - if isinstance(self.idl[v], range): - my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' + if key not in self.covobs: + my_string = ' ' + "\u00B7 Ensemble '" + key + "' " + if len(value) == 1: + my_string += f': {self.shape[value[0]]} configurations' + if isinstance(self.idl[value[0]], range): + my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' else: - my_substring += ' (irregular range)' - sublist.append(my_substring) + my_string += ' (irregular range)' + else: + sublist = [] + for v in value: + my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " + my_substring += f': {self.shape[v]} configurations' + if isinstance(self.idl[v], range): + my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' + else: + my_substring += ' (irregular range)' + sublist.append(my_substring) - my_string += '\n' + '\n'.join(sublist) + my_string += '\n' + '\n'.join(sublist) + else: + my_string = ' ' + "\u00B7 Covobs '" + key + "' " my_string_list.append(my_string) print('\n'.join(my_string_list)) @@ -406,11 +448,19 @@ class Obs: Works only properly when the gamma method was run. """ - return self.is_zero() or np.abs(self.value) <= sigma * self.dvalue + return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue - def is_zero(self): - """Checks whether the observable is zero within machine precision.""" - return np.isclose(0.0, self.value) and all(np.allclose(0.0, delta) for delta in self.deltas.values()) + def is_zero(self, rtol=1.e-5, atol=1.e-8): + """Checks whether the observable is zero within a given tolerance. + + Parameters + ---------- + rtol : float + Relative tolerance (for details see numpy documentation). + atol : float + Absolute tolerance (for details see numpy documentation). + """ + return np.isclose(0.0, self.value, rtol, atol) and all(np.allclose(0.0, delta, rtol, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), rtol, atol) for delta in self.covobs.values()) def plot_tauint(self, save=None): """Plot integrated autocorrelation time for each ensemble. @@ -420,11 +470,11 @@ class Obs: save : str saves the figure to a file named 'save' if. """ - if not hasattr(self, 'e_names'): + if not hasattr(self, 'e_dvalue'): raise Exception('Run the gamma method first.') fig = plt.figure() - for e, e_name in enumerate(self.e_names): + for e, e_name in enumerate(self.mc_names): plt.xlabel(r'$W$') plt.ylabel(r'$\tau_\mathrm{int}$') length = int(len(self.e_n_tauint[e_name])) @@ -453,9 +503,9 @@ class Obs: def plot_rho(self): """Plot normalized autocorrelation function time for each ensemble.""" - if not hasattr(self, 'e_names'): + if not hasattr(self, 'e_dvalue'): raise Exception('Run the gamma method first.') - for e, e_name in enumerate(self.e_names): + for e, e_name in enumerate(self.mc_names): plt.xlabel('W') plt.ylabel('rho') length = int(len(self.e_drho[e_name])) @@ -475,9 +525,9 @@ class Obs: def plot_rep_dist(self): """Plot replica distribution for each ensemble with more than one replicum.""" - if not hasattr(self, 'e_names'): + if not hasattr(self, 'e_dvalue'): raise Exception('Run the gamma method first.') - for e, e_name in enumerate(self.e_names): + for e, e_name in enumerate(self.mc_names): if len(self.e_content[e_name]) == 1: print('No replica distribution for a single replicum (', e_name, ')') continue @@ -503,16 +553,13 @@ class Obs: expand : bool show expanded history for irregular Monte Carlo chains (default: True). """ - if not hasattr(self, 'e_names'): - raise Exception('Run the gamma method first.') - - for e, e_name in enumerate(self.e_names): + for e, e_name in enumerate(self.mc_names): plt.figure() r_length = [] tmp = [] for r, r_name in enumerate(self.e_content[e_name]): if expand: - tmp.append(_expand_deltas(self.deltas[r_name], self.idl[r_name], self.shape[r_name]) + self.r_values[r_name]) + tmp.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) else: tmp.append(self.deltas[r_name] + self.r_values[r_name]) r_length.append(len(tmp[-1])) @@ -527,12 +574,12 @@ class Obs: def plot_piechart(self): """Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.""" - if not hasattr(self, 'e_names'): + if not hasattr(self, 'e_dvalue'): raise Exception('Run the gamma method first.') - if self.dvalue == 0.0: + if self._dvalue == 0.0: raise Exception('Error is 0.0') labels = self.e_names - sizes = [i ** 2 for i in list(self.e_dvalue.values())] / self.dvalue ** 2 + sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 fig1, ax1 = plt.subplots() ax1.pie(sizes, labels=labels, startangle=90, normalize=True) ax1.axis('equal') @@ -577,12 +624,10 @@ class Obs: name = self.names[0] full_data = self.deltas[name] + self.r_values[name] n = full_data.size - mean = np.mean(full_data) + mean = self.value tmp_jacks = np.zeros(n + 1) - tmp_jacks[0] = self.value - for i in range(n): - tmp_jacks[i + 1] = (n * mean - full_data[i]) / (n - 1) - + tmp_jacks[0] = mean + tmp_jacks[1:] = (n * mean - full_data) / (n - 1) return tmp_jacks def __float__(self): @@ -592,15 +637,15 @@ class Obs: return 'Obs[' + str(self) + ']' def __str__(self): - if self.dvalue == 0.0: + if self._dvalue == 0.0: return str(self.value) - fexp = np.floor(np.log10(self.dvalue)) + fexp = np.floor(np.log10(self._dvalue)) if fexp < 0.0: - return '{:{form}}({:2.0f})'.format(self.value, self.dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') + return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') elif fexp == 0.0: - return '{:.1f}({:1.1f})'.format(self.value, self.dvalue) + return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) else: - return '{:.0f}({:2.0f})'.format(self.value, self.dvalue) + return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) # Overload comparisons def __lt__(self, other): @@ -902,7 +947,7 @@ def _merge_idx(idl): g = groupby(idl) if next(g, True) and not next(g, False): return idl[0] - except: + except Exception: pass if np.all([type(idx) is range for idx in idl]): @@ -942,44 +987,36 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx): return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) -def _filter_zeroes(names, deltas, idl, eps=Obs.filter_eps): +def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): """Filter out all configurations with vanishing fluctuation such that they do not - contribute to the error estimate anymore. Returns the new names, deltas and - idl according to the filtering. + contribute to the error estimate anymore. Returns the new deltas and + idx according to the filtering. A fluctuation is considered to be vanishing, if it is smaller than eps times the mean of the absolute values of all deltas in one list. Parameters ---------- - names : list - List of names - deltas : dict - Dict lists of fluctuations - idx : dict - Dict of lists or ranges of configs on which the deltas are defined. - Has to be a subset of new_idx. + deltas : list + List of fluctuations + idx : list + List or ranges of configs on which the deltas are defined. eps : float Prefactor that enters the filter criterion. """ - new_names = [] - new_deltas = {} - new_idl = {} - for name in names: - nd = [] - ni = [] - maxd = np.mean(np.fabs(deltas[name])) - for i in range(len(deltas[name])): - if not np.isclose(0.0, deltas[name][i], atol=eps * maxd): - nd.append(deltas[name][i]) - ni.append(idl[name][i]) - if nd: - new_names.append(name) - new_deltas[name] = np.array(nd) - new_idl[name] = ni - return (new_names, new_deltas, new_idl) + new_deltas = [] + new_idx = [] + maxd = np.mean(np.fabs(deltas)) + for i in range(len(deltas)): + if abs(deltas[i]) > eps * maxd: + new_deltas.append(deltas[i]) + new_idx.append(idx[i]) + if new_idx: + return np.array(new_deltas), new_idx + else: + return deltas, idx -def derived_observable(func, data, **kwargs): +def derived_observable(func, data, array_mode=False, **kwargs): """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. Parameters @@ -1012,32 +1049,27 @@ def derived_observable(func, data, **kwargs): raveled_data = data.ravel() # Workaround for matrix operations containing non Obs data - for i_data in raveled_data: - if isinstance(i_data, Obs): - first_name = i_data.names[0] - first_shape = i_data.shape[first_name] - first_idl = i_data.idl[first_name] - break + if not all(isinstance(x, Obs) for x in raveled_data): + for i in range(len(raveled_data)): + if isinstance(raveled_data[i], (int, float)): + raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") - for i in range(len(raveled_data)): - if isinstance(raveled_data[i], (int, float)): - raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name], idl=[first_idl]) + allcov = {} + for o in raveled_data: + for name in o.cov_names: + if name in allcov: + if not np.allclose(allcov[name], o.covobs[name].cov): + raise Exception('Inconsistent covariance matrices for %s!' % (name)) + else: + allcov[name] = o.covobs[name].cov n_obs = len(raveled_data) new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) + new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) + new_sample_names = sorted(set(new_names) - set(new_cov_names)) - is_merged = len(list(filter(lambda o: o.is_merged is True, raveled_data))) > 0 + is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 - new_idl_d = {} - for name in new_names: - idl = [] - for i_data in raveled_data: - tmp = i_data.idl.get(name) - if tmp is not None: - idl.append(tmp) - new_idl_d[name] = _merge_idx(idl) - if not is_merged: - is_merged = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) if data.ndim == 1: values = np.array([o.value for o in data]) @@ -1046,21 +1078,24 @@ def derived_observable(func, data, **kwargs): new_values = func(values, **kwargs) - multi = 0 - if isinstance(new_values, np.ndarray): - multi = 1 + multi = int(isinstance(new_values, np.ndarray)) new_r_values = {} - for name in new_names: + new_idl_d = {} + for name in new_sample_names: + idl = [] tmp_values = np.zeros(n_obs) for i, item in enumerate(raveled_data): - tmp = item.r_values.get(name) - if tmp is None: - tmp = item.value - tmp_values[i] = tmp + tmp_values[i] = item.r_values.get(name, item.value) + tmp_idl = item.idl.get(name) + if tmp_idl is not None: + idl.append(tmp_idl) if multi > 0: tmp_values = np.array(tmp_values).reshape(data.shape) new_r_values[name] = func(tmp_values, **kwargs) + new_idl_d[name] = _merge_idx(idl) + if not is_merged[name]: + is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) if 'man_grad' in kwargs: deriv = np.asarray(kwargs.get('man_grad')) @@ -1093,26 +1128,71 @@ def derived_observable(func, data, **kwargs): final_result = np.zeros(new_values.shape, dtype=object) + if array_mode is True: + + class _Zero_grad(): + def __init__(self, N): + self.grad = np.zeros((N, 1)) + + new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) + d_extracted = {} + g_extracted = {} + for name in new_sample_names: + d_extracted[name] = [] + ens_length = len(new_idl_d[name]) + for i_dat, dat in enumerate(data): + d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) + for name in new_cov_names: + g_extracted[name] = [] + zero_grad = _Zero_grad(new_covobs_lengths[name]) + for i_dat, dat in enumerate(data): + g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) + for i_val, new_val in np.ndenumerate(new_values): new_deltas = {} - for j_obs, obs in np.ndenumerate(data): - for name in obs.names: - new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) + new_grad = {} + if array_mode is True: + for name in new_sample_names: + ens_length = d_extracted[name][0].shape[-1] + new_deltas[name] = np.zeros(ens_length) + for i_dat, dat in enumerate(d_extracted[name]): + new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) + for name in new_cov_names: + new_grad[name] = 0 + for i_dat, dat in enumerate(g_extracted[name]): + new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) + else: + for j_obs, obs in np.ndenumerate(data): + for name in obs.names: + if name in obs.cov_names: + new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad + else: + new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) + new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} + + if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): + raise Exception('The same name has been used for deltas and covobs!') new_samples = [] new_means = [] new_idl = [] - if is_merged: - filtered_names, filtered_deltas, filtered_idl_d = _filter_zeroes(new_names, new_deltas, new_idl_d) - else: - filtered_names = new_names - filtered_deltas = new_deltas - filtered_idl_d = new_idl_d - for name in filtered_names: - new_samples.append(filtered_deltas[name]) - new_means.append(new_r_values[name][i_val]) - new_idl.append(filtered_idl_d[name]) - final_result[i_val] = Obs(new_samples, filtered_names, means=new_means, idl=new_idl) + new_names_obs = [] + for name in new_names: + if name not in new_covobs: + if is_merged[name]: + filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) + else: + filtered_deltas = new_deltas[name] + filtered_idl_d = new_idl_d[name] + + new_samples.append(filtered_deltas) + new_idl.append(filtered_idl_d) + new_means.append(new_r_values[name][i_val]) + new_names_obs.append(name) + final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) + for name in new_covobs: + final_result[i_val].names.append(name) + final_result[i_val]._covobs = new_covobs final_result[i_val]._value = new_val final_result[i_val].is_merged = is_merged final_result[i_val].reweighted = reweighted @@ -1175,22 +1255,24 @@ def reweight(weight, obs, **kwargs): """ result = [] for i in range(len(obs)): - if sorted(weight.names) != sorted(obs[i].names): + if len(obs[i].cov_names): + raise Exception('Error: Not possible to reweight an Obs that contains covobs!') + if not set(obs[i].names).issubset(weight.names): raise Exception('Error: Ensembles do not fit') - for name in weight.names: + for name in obs[i].names: if not set(obs[i].idl[name]).issubset(weight.idl[name]): raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) new_samples = [] w_deltas = {} - for name in sorted(weight.names): + for name in sorted(obs[i].names): w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) - tmp_obs = Obs(new_samples, sorted(weight.names), idl=[obs[i].idl[name] for name in sorted(weight.names)]) + tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) if kwargs.get('all_configs'): new_weight = weight else: - new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(weight.names)], sorted(weight.names), idl=[obs[i].idl[name] for name in sorted(weight.names)]) + new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) result[-1].reweighted = True @@ -1216,6 +1298,8 @@ def correlate(obs_a, obs_b): if sorted(obs_a.names) != sorted(obs_b.names): raise Exception('Ensembles do not fit') + if len(obs_a.cov_names) or len(obs_b.cov_names): + raise Exception('Error: Not possible to correlate Obs that contain covobs!') for name in obs_a.names: if obs_a.shape[name] != obs_b.shape[name]: raise Exception('Shapes of ensemble', name, 'do not fit') @@ -1234,7 +1318,7 @@ def correlate(obs_a, obs_b): new_idl.append(obs_a.idl[name]) o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) - o.is_merged = obs_a.is_merged or obs_b.is_merged + o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} o.reweighted = obs_a.reweighted or obs_b.reweighted return o @@ -1246,69 +1330,7 @@ def covariance(obs1, obs2, correlation=False, **kwargs): The gamma method has to be applied first to both observables. If abs(covariance(obs1, obs2)) > obs1.dvalue * obs2.dvalue, the covariance - is constrained to the maximum value in order to make sure that covariance - matrices are positive semidefinite. - - Parameters - ---------- - obs1 : Obs - First Obs - obs2 : Obs - Second Obs - correlation : bool - if true the correlation instead of the covariance is - returned (default False) - """ - - for name in sorted(set(obs1.names + obs2.names)): - if (obs1.shape.get(name) != obs2.shape.get(name)) and (obs1.shape.get(name) is not None) and (obs2.shape.get(name) is not None): - raise Exception('Shapes of ensemble', name, 'do not fit') - if (1 != len(set([len(idx) for idx in [obs1.idl[name], obs2.idl[name], _merge_idx([obs1.idl[name], obs2.idl[name]])]]))): - raise Exception('Shapes of ensemble', name, 'do not fit') - - if not hasattr(obs1, 'e_names') or not hasattr(obs2, 'e_names'): - raise Exception('The gamma method has to be applied to both Obs first.') - - dvalue = 0 - - for e_name in obs1.e_names: - - if e_name not in obs2.e_names: - continue - - gamma = 0 - r_length = [] - for r_name in obs1.e_content[e_name]: - if r_name not in obs2.e_content[e_name]: - continue - - r_length.append(len(obs1.deltas[r_name])) - - gamma += np.sum(obs1.deltas[r_name] * obs2.deltas[r_name]) - - e_N = np.sum(r_length) - - tau_combined = (obs1.e_tauint[e_name] + obs2.e_tauint[e_name]) / 2 - dvalue += gamma / e_N * (1 + 1 / e_N) / e_N * 2 * tau_combined - - if np.abs(dvalue / obs1.dvalue / obs2.dvalue) > 1.0: - dvalue = np.sign(dvalue) * obs1.dvalue * obs2.dvalue - - if correlation: - dvalue = dvalue / obs1.dvalue / obs2.dvalue - - return dvalue - - -def covariance2(obs1, obs2, correlation=False, **kwargs): - """Alternative implementation of the covariance of two observables. - - covariance(obs, obs) is equal to obs.dvalue ** 2 - The gamma method has to be applied first to both observables. - - If abs(covariance(obs1, obs2)) > obs1.dvalue * obs2.dvalue, the covariance - is constrained to the maximum value in order to make sure that covariance - matrices are positive semidefinite. + is constrained to the maximum value. Keyword arguments ----------------- @@ -1350,7 +1372,10 @@ def covariance2(obs1, obs2, correlation=False, **kwargs): return gamma - if not hasattr(obs1, 'e_names') or not hasattr(obs2, 'e_names'): + if set(obs1.names).isdisjoint(set(obs2.names)): + return 0. + + if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): raise Exception('The gamma method has to be applied to both Obs first.') dvalue = 0 @@ -1359,9 +1384,9 @@ def covariance2(obs1, obs2, correlation=False, **kwargs): e_n_tauint = {} e_rho = {} - for e_name in obs1.e_names: + for e_name in obs1.mc_names: - if e_name not in obs2.e_names: + if e_name not in obs2.mc_names: continue idl_d = {} @@ -1406,12 +1431,19 @@ def covariance2(obs1, obs2, correlation=False, **kwargs): # Make sure no entry of tauint is smaller than 0.5 e_n_tauint[e_name][e_n_tauint[e_name] < 0.5] = 0.500000000001 - window = max(obs1.e_windowsize[e_name], obs2.e_windowsize[e_name]) + window = min(obs1.e_windowsize[e_name], obs2.e_windowsize[e_name]) # Bias correction hep-lat/0306017 eq. (49) e_dvalue[e_name] = 2 * (e_n_tauint[e_name][window] + obs1.tau_exp[e_name] * np.abs(e_rho[e_name][window + 1])) * (1 + (2 * window + 1) / e_N) * e_gamma[e_name][0] / e_N dvalue += e_dvalue[e_name] + for e_name in obs1.cov_names: + + if e_name not in obs2.cov_names: + continue + + dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) + if np.abs(dvalue / obs1.dvalue / obs2.dvalue) > 1.0: dvalue = np.sign(dvalue) * obs1.dvalue * obs2.dvalue @@ -1421,70 +1453,6 @@ def covariance2(obs1, obs2, correlation=False, **kwargs): return dvalue -def covariance3(obs1, obs2, correlation=False, **kwargs): - """Another alternative implementation of the covariance of two observables. - - covariance2(obs, obs) is equal to obs.dvalue ** 2 - Currently only works if ensembles are identical. - The gamma method has to be applied first to both observables. - - If abs(covariance2(obs1, obs2)) > obs1.dvalue * obs2.dvalue, the covariance - is constrained to the maximum value in order to make sure that covariance - matrices are positive semidefinite. - - Keyword arguments - ----------------- - correlation -- if true the correlation instead of the covariance is - returned (default False) - plot -- if true, the integrated autocorrelation time for each ensemble is - plotted. - """ - - for name in sorted(set(obs1.names + obs2.names)): - if (obs1.shape.get(name) != obs2.shape.get(name)) and (obs1.shape.get(name) is not None) and (obs2.shape.get(name) is not None): - raise Exception('Shapes of ensemble', name, 'do not fit') - if (1 != len(set([len(idx) for idx in [obs1.idl[name], obs2.idl[name], _merge_idx([obs1.idl[name], obs2.idl[name]])]]))): - raise Exception('Shapes of ensemble', name, 'do not fit') - - if not hasattr(obs1, 'e_names') or not hasattr(obs2, 'e_names'): - raise Exception('The gamma method has to be applied to both Obs first.') - - tau_exp = [] - S = [] - for e_name in sorted(set(obs1.e_names + obs2.e_names)): - t_1 = obs1.tau_exp.get(e_name) - t_2 = obs2.tau_exp.get(e_name) - if t_1 is None: - t_1 = 0 - if t_2 is None: - t_2 = 0 - tau_exp.append(max(t_1, t_2)) - S_1 = obs1.S.get(e_name) - S_2 = obs2.S.get(e_name) - if S_1 is None: - S_1 = Obs.S_global - if S_2 is None: - S_2 = Obs.S_global - S.append(max(S_1, S_2)) - - check_obs = obs1 + obs2 - check_obs.gamma_method(tau_exp=tau_exp, S=S) - - if kwargs.get('plot'): - check_obs.plot_tauint() - check_obs.plot_rho() - - cov = (check_obs.dvalue ** 2 - obs1.dvalue ** 2 - obs2.dvalue ** 2) / 2 - - if np.abs(cov / obs1.dvalue / obs2.dvalue) > 1.0: - cov = np.sign(cov) * obs1.dvalue * obs2.dvalue - - if correlation: - cov = cov / obs1.dvalue / obs2.dvalue - - return cov - - def pseudo_Obs(value, dvalue, name, samples=1000): """Generate a pseudo Obs with given value, dvalue and name @@ -1517,36 +1485,23 @@ def pseudo_Obs(value, dvalue, name, samples=1000): return res -def dump_object(obj, name, **kwargs): - """Dump object into pickle file. +def import_jackknife(jacks, name, idl=None): + """Imports jackknife samples and returns an Obs Parameters ---------- - obj : object - object to be saved in the pickle file + jacks : numpy.ndarray + numpy array containing the mean value as zeroth entry and + the N jackknife samples as first to Nth entry. name : str - name of the file - path : str - specifies a custom path for the file (default '.') + name of the ensemble the samples are defined on. """ - if 'path' in kwargs: - file_name = kwargs.get('path') + '/' + name + '.p' - else: - file_name = name + '.p' - with open(file_name, 'wb') as fb: - pickle.dump(obj, fb) - - -def load_object(path): - """Load object from pickle file. - - Parameters - ---------- - path : str - path to the file - """ - with open(path, 'rb') as file: - return pickle.load(file) + length = len(jacks) - 1 + prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) + samples = jacks[1:] @ prj + new_obs = Obs([samples], [name], idl=idl) + new_obs._value = jacks[0] + return new_obs def merge_obs(list_of_obs): @@ -1562,6 +1517,8 @@ def merge_obs(list_of_obs): replist = [item for obs in list_of_obs for item in obs.names] if (len(replist) == len(set(replist))) is False: raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) + if any([len(o.cov_names) for o in list_of_obs]): + raise Exception('Not possible to merge data that contains covobs!') new_dict = {} idl_dict = {} for o in list_of_obs: @@ -1571,6 +1528,49 @@ def merge_obs(list_of_obs): names = sorted(new_dict.keys()) o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) - o.is_merged = np.any([oi.is_merged for oi in list_of_obs]) + o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) return o + + +def cov_Obs(means, cov, name, grad=None): + """Create an Obs based on mean(s) and a covariance matrix + + Parameters + ---------- + mean : list of floats or float + N mean value(s) of the new Obs + cov : list or array + 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance + name : str + identifier for the covariance matrix + grad : list or array + Gradient of the Covobs wrt. the means belonging to cov. + """ + + def covobs_to_obs(co): + """Make an Obs out of a Covobs + + Parameters + ---------- + co : Covobs + Covobs to be embedded into the Obs + """ + o = Obs([], []) + o._value = co.value + o.names.append(co.name) + o._covobs[co.name] = co + o._dvalue = np.sqrt(co.errsq()) + return o + + ol = [] + if isinstance(means, (float, int)): + means = [means] + + for i in range(len(means)): + ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) + if ol[0].covobs[name].N != len(means): + raise Exception('You have to provide %d mean values!' % (ol[0].N)) + if len(ol) == 1: + return ol[0] + return ol diff --git a/pyerrors/roots.py b/pyerrors/roots.py index a932fa8c..e26b44e7 100644 --- a/pyerrors/roots.py +++ b/pyerrors/roots.py @@ -1,6 +1,7 @@ +import numpy as np import scipy.optimize from autograd import jacobian -from .obs import derived_observable, pseudo_Obs +from .obs import derived_observable def find_root(d, func, guess=1.0, **kwargs): @@ -33,4 +34,5 @@ def find_root(d, func, guess=1.0, **kwargs): da = jacobian(lambda u, v: func(v, u))(d.value, root[0]) deriv = - da / dx - return derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(root, 0.0, d.names[0], d.shape[d.names[0]]), d], man_grad=[0, deriv]) + res = derived_observable(lambda x, **kwargs: (x[0] + np.finfo(np.float64).eps) / (d.value + np.finfo(np.float64).eps) * root[0], [d], man_grad=[deriv]) + return res diff --git a/setup.py b/setup.py index d632fdcb..5a971d92 100644 --- a/setup.py +++ b/setup.py @@ -9,5 +9,5 @@ setup(name='pyerrors', author_email='fabian.joswig@ed.ac.uk', packages=find_packages(), python_requires='>=3.6.0', - install_requires=['numpy>=1.16', 'autograd>=1.2', 'numdifftools', 'matplotlib>=3.3', 'scipy', 'iminuit<2', 'h5py'] + install_requires=['numpy>=1.16', 'autograd @ git+https://github.com/HIPS/autograd.git', 'numdifftools', 'matplotlib>=3.3', 'scipy', 'iminuit>=2', 'h5py'] ) diff --git a/tests/correlators_test.py b/tests/correlators_test.py index d2bb6a2c..6c3418f8 100644 --- a/tests/correlators_test.py +++ b/tests/correlators_test.py @@ -79,6 +79,26 @@ def test_T_symmetry(): T_symmetric = my_corr.T_symmetry(my_corr) +def test_fit_correlator(): + my_corr = pe.correlators.Corr([pe.pseudo_Obs(1.01324, 0.05, 't'), pe.pseudo_Obs(2.042345, 0.0004, 't')]) + + def f(a, x): + y = a[0] + a[1] * x + return y + + fit_res = my_corr.fit(f) + assert fit_res[0] == my_corr[0] + assert fit_res[1] == my_corr[1] - my_corr[0] + + +def test_plateau(): + my_corr = pe.correlators.Corr([pe.pseudo_Obs(1.01324, 0.05, 't'), pe.pseudo_Obs(1.042345, 0.008, 't')]) + + my_corr.plateau([0, 1], method="fit") + my_corr.plateau([0, 1], method="mean") + with pytest.raises(Exception): + my_corr.plateau() + def test_utility(): corr_content = [] for t in range(8): diff --git a/tests/covobs_test.py b/tests/covobs_test.py new file mode 100644 index 00000000..215c75d3 --- /dev/null +++ b/tests/covobs_test.py @@ -0,0 +1,94 @@ +import autograd.numpy as np +import pyerrors as pe +import pytest + +np.random.seed(0) + + +def test_covobs(): + val = 1.123124 + cov = .243423 + name = 'Covariance' + co = pe.cov_Obs(val, cov, name) + co.gamma_method() + co.details() + assert (co.dvalue == np.sqrt(cov)) + assert (co.value == val) + + do = 2 * co + assert (do.covobs[name].grad[0] == 2) + + do = co * co + assert (do.covobs[name].grad[0] == 2 * val) + assert np.array_equal(do.covobs[name].cov, co.covobs[name].cov) + + pi = [16.7457, -19.0475] + cov = [[3.49591, -6.07560], [-6.07560, 10.5834]] + + cl = pe.cov_Obs(pi, cov, 'rAP') + pl = pe.misc.gen_correlated_data(pi, np.asarray(cov), 'rAPpseudo') + + def rAP(p, g0sq): + return -0.0010666 * g0sq * (1 + np.exp(p[0] + p[1] / g0sq)) + + for g0sq in [1, 1.5, 1.8]: + oc = rAP(cl, g0sq) + oc.gamma_method() + op = rAP(pl, g0sq) + op.gamma_method() + assert(np.isclose(oc.value, op.value, rtol=1e-14, atol=1e-14)) + + [o.gamma_method() for o in cl] + assert(pe.covariance(cl[0], cl[1]) == cov[0][1]) + assert(pe.covariance(cl[0], cl[1]) == cov[1][0]) + + do = cl[0] * cl[1] + assert(np.array_equal(do.covobs['rAP'].grad, np.transpose([pi[1], pi[0]]).reshape(2, 1))) + + +def test_covobs_overloading(): + covobs = pe.cov_Obs([0.5, 0.5], np.array([[0.02, 0.02], [0.02, 0.02]]), 'test') + assert (covobs[0] / covobs[1]) == 1 + assert (covobs[0] - covobs[1]) == 0 + + my_obs = pe.pseudo_Obs(2.3, 0.2, 'obs') + + assert (my_obs * covobs[0] / covobs[1]) == my_obs + + covobs = pe.cov_Obs(0.0, 0.3, 'test') + assert not covobs.is_zero() + + +def test_covobs_name_collision(): + covobs = pe.cov_Obs(0.5, 0.002, 'test') + my_obs = pe.pseudo_Obs(2.3, 0.2, 'test') + with pytest.raises(Exception): + summed_obs = my_obs + covobs + covobs2 = pe.cov_Obs(0.3, 0.001, 'test') + with pytest.raises(Exception): + summed_obs = covobs + covobs2 + + +def test_covobs_replica_separator(): + with pytest.raises(Exception): + covobs = pe.cov_Obs(0.5, 0.002, 'test|r2') + + +def test_covobs_init(): + covobs = pe.cov_Obs(0.5, 0.002, 'test') + covobs = pe.cov_Obs([1, 2], [0.1, 0.2], 'test') + covobs = pe.cov_Obs([1, 2], np.array([0.1, 0.2]), 'test') + covobs = pe.cov_Obs([1, 2], [[0.1, 0.2], [0.1, 0.2]], 'test') + covobs = pe.cov_Obs([1, 2], np.array([[0.1, 0.2], [0.1, 0.2]]), 'test') + + + +def test_covobs_exceptions(): + with pytest.raises(Exception): + covobs = pe.cov_Obs(0.1, [[0.1, 0.2], [0.1, 0.2]], 'test') + with pytest.raises(Exception): + covobs = pe.cov_Obs(0.1, np.array([[0.1, 0.2], [0.1, 0.2]]), 'test') + with pytest.raises(Exception): + covobs = pe.cov_Obs([0.5, 0.1], np.array([[2, 1, 3], [1, 2, 3]]), 'test') + with pytest.raises(Exception): + covobs = pe.cov_Obs([0.5, 0.1], np.random.random((2, 2, 2)), 'test') diff --git a/tests/dirac_test.py b/tests/dirac_test.py index 28222da6..0a2c0379 100644 --- a/tests/dirac_test.py +++ b/tests/dirac_test.py @@ -10,3 +10,25 @@ def test_gamma_matrices(): assert np.allclose(matrix @ matrix, np.identity(4)) assert np.allclose(matrix, matrix.T.conj()) assert np.allclose(pe.dirac.gamma5, pe.dirac.gamma[0] @ pe.dirac.gamma[1] @ pe.dirac.gamma[2] @ pe.dirac.gamma[3]) + + +def test_grid_dirac(): + for gamma in ['Identity', + 'Gamma5', + 'GammaX', + 'GammaY', + 'GammaZ', + 'GammaT', + 'GammaXGamma5', + 'GammaYGamma5', + 'GammaZGamma5', + 'GammaTGamma5', + 'SigmaXT', + 'SigmaXY', + 'SigmaXZ', + 'SigmaYT', + 'SigmaYZ', + 'SigmaZT']: + pe.dirac.Grid_gamma(gamma) + with pytest.raises(Exception): + pe.dirac.Grid_gamma('Not a gamma matrix') diff --git a/tests/fits_test.py b/tests/fits_test.py index 69a58a33..8a9e0843 100644 --- a/tests/fits_test.py +++ b/tests/fits_test.py @@ -2,12 +2,34 @@ import autograd.numpy as np import math import scipy.optimize from scipy.odr import ODR, Model, RealData +from scipy.linalg import cholesky +from scipy.stats import norm import pyerrors as pe import pytest np.random.seed(0) +def test_fit_lin(): + x = [0, 2] + y = [pe.pseudo_Obs(0, 0.1, 'ensemble'), + pe.pseudo_Obs(2, 0.1, 'ensemble')] + + res = pe.fits.fit_lin(x, y) + + assert res[0] == y[0] + assert res[1] == (y[1] - y[0]) / (x[1] - x[0]) + + x = y = [pe.pseudo_Obs(0, 0.1, 'ensemble'), + pe.pseudo_Obs(2, 0.1, 'ensemble')] + + res = pe.fits.fit_lin(x, y) + + m = (y[1] - y[0]) / (x[1] - x[0]) + assert res[0] == y[1] - x[1] * m + assert res[1] == m + + def test_least_squares(): dim = 10 + int(30 * np.random.rand()) x = np.arange(dim) @@ -27,20 +49,86 @@ def test_least_squares(): y = a[0] * np.exp(-a[1] * x) return y - out = pe.least_squares(x, oy, func) + out = pe.least_squares(x, oy, func, expected_chisquare=True, resplot=True, qqplot=True) beta = out.fit_parameters + str(out) + repr(out) + len(out) + for i in range(2): beta[i].gamma_method(S=1.0) assert math.isclose(beta[i].value, popt[i], abs_tol=1e-5) assert math.isclose(pcov[i, i], beta[i].dvalue ** 2, abs_tol=1e-3) assert math.isclose(pe.covariance(beta[0], beta[1]), pcov[0, 1], abs_tol=1e-3) - pe.Obs.e_tag_global = 0 chi2_pyerrors = np.sum(((f(x, *[o.value for o in beta]) - y) / yerr) ** 2) / (len(x) - 2) chi2_scipy = np.sum(((f(x, *popt) - y) / yerr) ** 2) / (len(x) - 2) assert math.isclose(chi2_pyerrors, chi2_scipy, abs_tol=1e-10) + out = pe.least_squares(x, oy, func, const_par=[beta[1]]) + assert((out.fit_parameters[0] - beta[0]).is_zero()) + assert((out.fit_parameters[1] - beta[1]).is_zero()) + + oyc = [] + for i, item in enumerate(x): + oyc.append(pe.cov_Obs(y[i], yerr[i]**2, 'cov' + str(i))) + + outc = pe.least_squares(x, oyc, func) + betac = outc.fit_parameters + + for i in range(2): + betac[i].gamma_method(S=1.0) + assert math.isclose(betac[i].value, popt[i], abs_tol=1e-5) + assert math.isclose(pcov[i, i], betac[i].dvalue ** 2, abs_tol=1e-3) + assert math.isclose(pe.covariance(betac[0], betac[1]), pcov[0, 1], abs_tol=1e-3) + + +def test_correlated_fit(): + num_samples = 400 + N = 10 + + x = norm.rvs(size=(N, num_samples)) + + r = np.zeros((N, N)) + for i in range(N): + for j in range(N): + r[i, j] = np.exp(-0.1 * np.fabs(i - j)) + + errl = np.sqrt([3.4, 2.5, 3.6, 2.8, 4.2, 4.7, 4.9, 5.1, 3.2, 4.2]) + errl *= 4 + for i in range(N): + for j in range(N): + r[i, j] *= errl[i] * errl[j] + + c = cholesky(r, lower=True) + y = np.dot(c, x) + x = np.arange(N) + for linear in [True, False]: + data = [] + for i in range(N): + if linear: + data.append(pe.Obs([[i + 1 + o for o in y[i]]], ['ens'])) + else: + data.append(pe.Obs([[np.exp(-(i + 1)) + np.exp(-(i + 1)) * o for o in y[i]]], ['ens'])) + + [o.gamma_method() for o in data] + + if linear: + def fitf(p, x): + return p[1] + p[0] * x + else: + def fitf(p, x): + return p[1] * np.exp(-p[0] * x) + + fitp = pe.least_squares(x, data, fitf, expected_chisquare=True) + + fitpc = pe.least_squares(x, data, fitf, correlated_fit=True) + for i in range(2): + diff = fitp[i] - fitpc[i] + diff.gamma_method() + assert(diff.is_zero_within_error(sigma=1.5)) + def test_total_least_squares(): dim = 10 + int(30 * np.random.rand()) @@ -70,16 +158,62 @@ def test_total_least_squares(): odr.set_job(fit_type=0, deriv=1) output = odr.run() - out = pe.total_least_squares(ox, oy, func) + out = pe.total_least_squares(ox, oy, func, expected_chisquare=True) beta = out.fit_parameters - pe.Obs.e_tag_global = 5 + str(out) + repr(out) + len(out) + for i in range(2): - beta[i].gamma_method(e_tag=5, S=1.0) + beta[i].gamma_method(S=1.0) assert math.isclose(beta[i].value, output.beta[i], rel_tol=1e-5) assert math.isclose(output.cov_beta[i, i], beta[i].dvalue ** 2, rel_tol=2.5e-1), str(output.cov_beta[i, i]) + ' ' + str(beta[i].dvalue ** 2) assert math.isclose(pe.covariance(beta[0], beta[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) - pe.Obs.e_tag_global = 0 + + out = pe.total_least_squares(ox, oy, func, const_par=[beta[1]]) + + diff = out.fit_parameters[0] - beta[0] + assert(diff / beta[0] < 1e-3 * beta[0].dvalue) + assert((out.fit_parameters[1] - beta[1]).is_zero()) + + oxc = [] + for i, item in enumerate(x): + oxc.append(pe.cov_Obs(x[i], xerr[i]**2, 'covx' + str(i))) + + oyc = [] + for i, item in enumerate(x): + oyc.append(pe.cov_Obs(y[i], yerr[i]**2, 'covy' + str(i))) + + outc = pe.total_least_squares(oxc, oyc, func) + betac = outc.fit_parameters + + for i in range(2): + betac[i].gamma_method(S=1.0) + assert math.isclose(betac[i].value, output.beta[i], rel_tol=1e-3) + assert math.isclose(output.cov_beta[i, i], betac[i].dvalue ** 2, rel_tol=2.5e-1), str(output.cov_beta[i, i]) + ' ' + str(betac[i].dvalue ** 2) + assert math.isclose(pe.covariance(betac[0], betac[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) + + outc = pe.total_least_squares(oxc, oyc, func, const_par=[betac[1]]) + + diffc = outc.fit_parameters[0] - betac[0] + assert(diffc / betac[0] < 1e-3 * betac[0].dvalue) + assert((outc.fit_parameters[1] - betac[1]).is_zero()) + + outc = pe.total_least_squares(oxc, oy, func) + betac = outc.fit_parameters + + for i in range(2): + betac[i].gamma_method(S=1.0) + assert math.isclose(betac[i].value, output.beta[i], rel_tol=1e-3) + assert math.isclose(output.cov_beta[i, i], betac[i].dvalue ** 2, rel_tol=2.5e-1), str(output.cov_beta[i, i]) + ' ' + str(betac[i].dvalue ** 2) + assert math.isclose(pe.covariance(betac[0], betac[1]), output.cov_beta[0, 1], rel_tol=2.5e-1) + + outc = pe.total_least_squares(oxc, oy, func, const_par=[betac[1]]) + + diffc = outc.fit_parameters[0] - betac[0] + assert(diffc / betac[0] < 1e-3 * betac[0].dvalue) + assert((outc.fit_parameters[1] - betac[1]).is_zero()) def test_odr_derivatives(): @@ -99,7 +233,177 @@ def test_odr_derivatives(): out = pe.total_least_squares(x, y, func) fit1 = out.fit_parameters - with pytest.warns(DeprecationWarning): - tfit = pe.fits.fit_general(x, y, func, base_step=0.1, step_ratio=1.1, num_steps=20) + tfit = fit_general(x, y, func, base_step=0.1, step_ratio=1.1, num_steps=20) assert np.abs(np.max(np.array(list(fit1[1].deltas.values())) - np.array(list(tfit[1].deltas.values())))) < 10e-8 + + +def test_r_value_persistence(): + def f(a, x): + return a[0] + a[1] * x + + a = pe.pseudo_Obs(1.1, .1, 'a') + assert np.isclose(a.value, a.r_values['a']) + + a_2 = a ** 2 + assert np.isclose(a_2.value, a_2.r_values['a']) + + b = pe.pseudo_Obs(2.1, .2, 'b') + + y = [a, b] + [o.gamma_method() for o in y] + + fitp = pe.fits.least_squares([1, 2], y, f) + + assert np.isclose(fitp[0].value, fitp[0].r_values['a']) + assert np.isclose(fitp[0].value, fitp[0].r_values['b']) + assert np.isclose(fitp[1].value, fitp[1].r_values['a']) + assert np.isclose(fitp[1].value, fitp[1].r_values['b']) + + fitp = pe.fits.total_least_squares(y, y, f) + + assert np.isclose(fitp[0].value, fitp[0].r_values['a']) + assert np.isclose(fitp[0].value, fitp[0].r_values['b']) + assert np.isclose(fitp[1].value, fitp[1].r_values['a']) + assert np.isclose(fitp[1].value, fitp[1].r_values['b']) + + fitp = pe.fits.least_squares([1, 2], y, f, priors=y) + + assert np.isclose(fitp[0].value, fitp[0].r_values['a']) + assert np.isclose(fitp[0].value, fitp[0].r_values['b']) + assert np.isclose(fitp[1].value, fitp[1].r_values['a']) + assert np.isclose(fitp[1].value, fitp[1].r_values['b']) + + +def test_prior_fit(): + def f(a, x): + return a[0] + a[1] * x + + a = pe.pseudo_Obs(0.0, 0.1, 'a') + b = pe.pseudo_Obs(1.0, 0.2, 'a') + + y = [a, b] + with pytest.raises(Exception): + fitp = pe.fits.least_squares([0, 1], 1 * np.array(y), f, priors=['0.0(8)', '1.0(8)']) + + [o.gamma_method() for o in y] + + fitp = pe.fits.least_squares([0, 1], y, f, priors=['0.0(8)', '1.0(8)']) + fitp = pe.fits.least_squares([0, 1], y, f, priors=y, resplot=True, qqplot=True) + + +def test_error_band(): + def f(a, x): + return a[0] + a[1] * x + + a = pe.pseudo_Obs(0.0, 0.1, 'a') + b = pe.pseudo_Obs(1.0, 0.2, 'a') + + x = [0, 1] + y = [a, b] + + fitp = pe.fits.least_squares(x, y, f) + + with pytest.raises(Exception): + pe.fits.error_band(x, f, fitp.fit_parameters) + fitp.gamma_method() + pe.fits.error_band(x, f, fitp.fit_parameters) + + +def fit_general(x, y, func, silent=False, **kwargs): + """Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters. + + Plausibility of the results should be checked. To control the numerical differentiation + the kwargs of numdifftools.step_generators.MaxStepGenerator can be used. + + func has to be of the form + + def func(a, x): + y = a[0] + a[1] * x + a[2] * np.sinh(x) + return y + + y has to be a list of Obs, the dvalues of the Obs are used as yerror for the fit. + x can either be a list of floats in which case no xerror is assumed, or + a list of Obs, where the dvalues of the Obs are used as xerror for the fit. + + Keyword arguments + ----------------- + silent -- If true all output to the console is omitted (default False). + initial_guess -- can provide an initial guess for the input parameters. Relevant for non-linear fits + with many parameters. + """ + + if not callable(func): + raise TypeError('func has to be a function.') + + for i in range(10): + try: + func(np.arange(i), 0) + except: + pass + else: + break + n_parms = i + if not silent: + print('Fit with', n_parms, 'parameters') + + global print_output, beta0 + print_output = 1 + if 'initial_guess' in kwargs: + beta0 = kwargs.get('initial_guess') + if len(beta0) != n_parms: + raise Exception('Initial guess does not have the correct length.') + else: + beta0 = np.arange(n_parms) + + if len(x) != len(y): + raise Exception('x and y have to have the same length') + + if all(isinstance(n, pe.Obs) for n in x): + obs = x + y + x_constants = None + xerr = [o.dvalue for o in x] + yerr = [o.dvalue for o in y] + elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray): + obs = y + x_constants = x + xerr = None + yerr = [o.dvalue for o in y] + else: + raise Exception('Unsupported types for x') + + def do_the_fit(obs, **kwargs): + + global print_output, beta0 + + func = kwargs.get('function') + yerr = kwargs.get('yerr') + length = len(yerr) + + xerr = kwargs.get('xerr') + + if length == len(obs): + assert 'x_constants' in kwargs + data = RealData(kwargs.get('x_constants'), obs, sy=yerr) + fit_type = 2 + elif length == len(obs) // 2: + data = RealData(obs[:length], obs[length:], sx=xerr, sy=yerr) + fit_type = 0 + else: + raise Exception('x and y do not fit together.') + + model = Model(func) + + odr = ODR(data, model, beta0, partol=np.finfo(np.float64).eps) + odr.set_job(fit_type=fit_type, deriv=1) + output = odr.run() + if print_output and not silent: + print(*output.stopreason) + print('chisquare/d.o.f.:', output.res_var) + print_output = 0 + beta0 = output.beta + return output.beta[kwargs.get('n')] + res = [] + for n in range(n_parms): + res.append(pe.derived_observable(do_the_fit, obs, function=func, xerr=xerr, yerr=yerr, x_constants=x_constants, num_grad=True, n=n, **kwargs)) + return res diff --git a/tests/io_test.py b/tests/io_test.py new file mode 100644 index 00000000..92781785 --- /dev/null +++ b/tests/io_test.py @@ -0,0 +1,91 @@ +import os +import gzip +import numpy as np +import pyerrors as pe +import pyerrors.input.json as jsonio + + +def test_jsonio(): + o = pe.pseudo_Obs(1.0, .2, 'one') + o2 = pe.pseudo_Obs(0.5, .1, 'two|r1') + o3 = pe.pseudo_Obs(0.5, .1, 'two|r2') + o4 = pe.merge_obs([o2, o3]) + otag = 'This has been merged!' + o4.tag = otag + do = o - .2 * o4 + co1 = pe.cov_Obs(1., .123, 'cov1') + co3 = pe.cov_Obs(4., .1 ** 2, 'cov3') + do *= co1 / co3 + do.tag = {'A': 2} + + o5 = pe.pseudo_Obs(0.8, .1, 'two|r2') + co2 = pe.cov_Obs([1, 2], [[.12, .004], [.004, .02]], 'cov2') + o5 /= co2[0] + o3 /= co2[1] + o5.tag = 2 * otag + testl = [o3, o5] + + arr = np.array([o3, o5]) + mat = np.array([[pe.pseudo_Obs(1.0, .1, 'mat'), pe.pseudo_Obs(0.3, .1, 'mat')], [pe.pseudo_Obs(0.2, .1, 'mat'), pe.pseudo_Obs(2.0, .4, 'mat')]]) + mat[0][1].tag = ['This', 'is', 2, None] + mat[1][0].tag = '{testt}' + mat[1][1].tag = '[tag]' + + tt1 = pe.Obs([np.random.rand(100)], ['t|r1'], idl=[range(2, 202, 2)]) + tt2 = pe.Obs([np.random.rand(100)], ['t|r2'], idl=[range(2, 202, 2)]) + tt3 = pe.Obs([np.random.rand(102)], ['qe']) + + tt = tt1 + tt2 + tt3 + + tt.tag = 'Test Obs: Ä' + + ol = [o4, do, testl, mat, arr, np.array([o]), np.array([tt, tt]), [tt, tt], co1, co2, np.array(co2), co1 / co2[0]] + fname = 'test_rw' + + jsonio.dump_to_json(ol, fname, indent=1, description='[I am a tricky description]') + + rl = jsonio.load_json(fname) + + os.remove(fname + '.json.gz') + + for o, r in zip(ol, rl): + assert np.all(o == r) + + for i in range(len(ol)): + if isinstance(ol[i], pe.Obs): + o = ol[i] - rl[i] + assert(o.is_zero()) + assert(ol[i].tag == rl[i].tag) + or1 = np.ravel(ol[i]) + or2 = np.ravel(rl[i]) + for j in range(len(or1)): + o = or1[j] - or2[j] + assert(o.is_zero()) + + description = {'I': {'Am': {'a': 'nested dictionary!'}}} + jsonio.dump_to_json(ol, fname, indent=0, gz=False, description=description) + + rl = jsonio.load_json(fname, gz=False, full_output=True) + + os.remove(fname + '.json') + + for o, r in zip(ol, rl['obsdata']): + assert np.all(o == r) + + assert(description == rl['description']) + + +def test_json_string_reconstruction(): + my_obs = pe.Obs([np.random.rand(100)], ['name']) + + json_string = pe.input.json.create_json_string(my_obs) + reconstructed_obs1 = pe.input.json.import_json_string(json_string) + assert my_obs == reconstructed_obs1 + + compressed_string = gzip.compress(json_string.encode('utf-8')) + + reconstructed_string = gzip.decompress(compressed_string).decode('utf-8') + reconstructed_obs2 = pe.input.json.import_json_string(reconstructed_string) + + assert reconstructed_string == json_string + assert my_obs == reconstructed_obs2 diff --git a/tests/linalg_test.py b/tests/linalg_test.py index 2b6f200c..f446d972 100644 --- a/tests/linalg_test.py +++ b/tests/linalg_test.py @@ -7,35 +7,153 @@ import pytest np.random.seed(0) -def test_matmul(): - for dim in [4, 8]: - my_list = [] - length = 1000 + np.random.randint(200) - for i in range(dim ** 2): - my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])) - my_array = np.array(my_list).reshape((dim, dim)) - tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array - for t, e in np.ndenumerate(tt): - assert e.is_zero(), t +def get_real_matrix(dimension): + base_matrix = np.empty((dimension, dimension), dtype=object) + for (n, m), entry in np.ndenumerate(base_matrix): + exponent_real = np.random.normal(0, 1) + exponent_imag = np.random.normal(0, 1) + base_matrix[n, m] = pe.Obs([np.random.normal(1.0, 0.1, 100)], ['t']) - my_list = [] - length = 1000 + np.random.randint(200) - for i in range(dim ** 2): - my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']), - pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))) - my_array = np.array(my_list).reshape((dim, dim)) - tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array - for t, e in np.ndenumerate(tt): - assert e.is_zero(), t + + return base_matrix + +def get_complex_matrix(dimension): + base_matrix = np.empty((dimension, dimension), dtype=object) + for (n, m), entry in np.ndenumerate(base_matrix): + exponent_real = np.random.normal(0, 1) + exponent_imag = np.random.normal(0, 1) + base_matrix[n, m] = pe.CObs(pe.Obs([np.random.normal(1.0, 0.1, 100)], ['t']), + pe.Obs([np.random.normal(1.0, 0.1, 100)], ['t'])) + + return base_matrix + + +def test_matmul(): + for dim in [4, 6]: + for const in [1, pe.cov_Obs([1.0, 1.0], [[0.001,0.0001], [0.0001, 0.002]], 'norm')[1]]: + my_list = [] + length = 100 + np.random.randint(200) + for i in range(dim ** 2): + my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])) + my_array = const * np.array(my_list).reshape((dim, dim)) + tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array + for t, e in np.ndenumerate(tt): + assert e.is_zero(), t + + my_list = [] + length = 100 + np.random.randint(200) + for i in range(dim ** 2): + my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']), + pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))) + my_array = np.array(my_list).reshape((dim, dim)) * const + tt = pe.linalg.matmul(my_array, my_array) - my_array @ my_array + for t, e in np.ndenumerate(tt): + assert e.is_zero(), t + + +def test_jack_matmul(): + tt = get_real_matrix(8) + check1 = pe.linalg.jack_matmul(tt, tt) - pe.linalg.matmul(tt, tt) + [o.gamma_method() for o in check1.ravel()] + assert np.all([o.is_zero_within_error(0.1) for o in check1.ravel()]) + assert np.all([o.dvalue < 0.001 for o in check1.ravel()]) + trace1 = np.trace(check1) + trace1.gamma_method() + assert trace1.dvalue < 0.001 + + tr = np.random.rand(8, 8) + check2 = pe.linalg.jack_matmul(tt, tr) - pe.linalg.matmul(tt, tr) + [o.gamma_method() for o in check2.ravel()] + assert np.all([o.is_zero_within_error(0.1) for o in check2.ravel()]) + assert np.all([o.dvalue < 0.001 for o in check2.ravel()]) + trace2 = np.trace(check2) + trace2.gamma_method() + assert trace2.dvalue < 0.001 + + tt2 = get_complex_matrix(8) + check3 = pe.linalg.jack_matmul(tt2, tt2) - pe.linalg.matmul(tt2, tt2) + [o.gamma_method() for o in check3.ravel()] + assert np.all([o.real.is_zero_within_error(0.1) for o in check3.ravel()]) + assert np.all([o.imag.is_zero_within_error(0.1) for o in check3.ravel()]) + assert np.all([o.real.dvalue < 0.001 for o in check3.ravel()]) + assert np.all([o.imag.dvalue < 0.001 for o in check3.ravel()]) + trace3 = np.trace(check3) + trace3.gamma_method() + assert trace3.real.dvalue < 0.001 + assert trace3.imag.dvalue < 0.001 + + tr2 = np.random.rand(8, 8) + 1j * np.random.rand(8, 8) + check4 = pe.linalg.jack_matmul(tt2, tr2) - pe.linalg.matmul(tt2, tr2) + [o.gamma_method() for o in check4.ravel()] + assert np.all([o.real.is_zero_within_error(0.1) for o in check4.ravel()]) + assert np.all([o.imag.is_zero_within_error(0.1) for o in check4.ravel()]) + assert np.all([o.real.dvalue < 0.001 for o in check4.ravel()]) + assert np.all([o.imag.dvalue < 0.001 for o in check4.ravel()]) + trace4 = np.trace(check4) + trace4.gamma_method() + assert trace4.real.dvalue < 0.001 + assert trace4.imag.dvalue < 0.001 + + +def test_einsum(): + + def _perform_real_check(arr): + [o.gamma_method() for o in arr] + assert np.all([o.is_zero_within_error(0.001) for o in arr]) + assert np.all([o.dvalue < 0.001 for o in arr]) + + def _perform_complex_check(arr): + [o.gamma_method() for o in arr] + assert np.all([o.real.is_zero_within_error(0.001) for o in arr]) + assert np.all([o.real.dvalue < 0.001 for o in arr]) + assert np.all([o.imag.is_zero_within_error(0.001) for o in arr]) + assert np.all([o.imag.dvalue < 0.001 for o in arr]) + + + tt = [get_real_matrix(4), get_real_matrix(3)] + q = np.tensordot(tt[0], tt[1], 0) + c1 = tt[1] @ q + c2 = pe.linalg.einsum('ij,abjd->abid', tt[1], q) + check1 = c1 - c2 + _perform_real_check(check1.ravel()) + check2 = np.trace(tt[0]) - pe.linalg.einsum('ii', tt[0]) + _perform_real_check([check2]) + check3 = np.trace(tt[1]) - pe.linalg.einsum('ii', tt[1]) + _perform_real_check([check3]) + + tt = [get_real_matrix(4), np.random.random((3, 3))] + q = np.tensordot(tt[0], tt[1], 0) + c1 = tt[1] @ q + c2 = pe.linalg.einsum('ij,abjd->abid', tt[1], q) + check1 = c1 - c2 + _perform_real_check(check1.ravel()) + + tt = [get_complex_matrix(4), get_complex_matrix(3)] + q = np.tensordot(tt[0], tt[1], 0) + c1 = tt[1] @ q + c2 = pe.linalg.einsum('ij,abjd->abid', tt[1], q) + check1 = c1 - c2 + _perform_complex_check(check1.ravel()) + check2 = np.trace(tt[0]) - pe.linalg.einsum('ii', tt[0]) + _perform_complex_check([check2]) + check3 = np.trace(tt[1]) - pe.linalg.einsum('ii', tt[1]) + _perform_complex_check([check3]) + + tt = [get_complex_matrix(4), np.random.random((3, 3))] + q = np.tensordot(tt[0], tt[1], 0) + c1 = tt[1] @ q + c2 = pe.linalg.einsum('ij,abjd->abid', tt[1], q) + check1 = c1 - c2 + _perform_complex_check(check1.ravel()) def test_multi_dot(): - for dim in [4, 8]: + for dim in [4, 6]: my_list = [] length = 1000 + np.random.randint(200) for i in range(dim ** 2): my_list.append(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2'])) - my_array = np.array(my_list).reshape((dim, dim)) + my_array = pe.cov_Obs(1.0, 0.002, 'cov') * np.array(my_list).reshape((dim, dim)) tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array for t, e in np.ndenumerate(tt): assert e.is_zero(), t @@ -45,12 +163,25 @@ def test_multi_dot(): for i in range(dim ** 2): my_list.append(pe.CObs(pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']), pe.Obs([np.random.rand(length), np.random.rand(length + 1)], ['t1', 't2']))) - my_array = np.array(my_list).reshape((dim, dim)) + my_array = np.array(my_list).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') tt = pe.linalg.matmul(my_array, my_array, my_array, my_array) - my_array @ my_array @ my_array @ my_array for t, e in np.ndenumerate(tt): assert e.is_zero(), t +def test_jack_multi_dot(): + for dim in [2, 4, 8]: + my_array = get_real_matrix(dim) + + tt = pe.linalg.jack_matmul(my_array, my_array, my_array) - pe.linalg.matmul(my_array, my_array, my_array) + + for t, e in np.ndenumerate(tt): + e.gamma_method() + assert e.is_zero_within_error(0.01) + assert e.is_zero(atol=1e-1), t + assert np.isclose(e.value, 0.0) + + def test_matmul_irregular_histories(): dim = 2 length = 500 @@ -58,13 +189,13 @@ def test_matmul_irregular_histories(): standard_array = [] for i in range(dim ** 2): standard_array.append(pe.Obs([np.random.normal(1.1, 0.2, length)], ['ens1'])) - standard_matrix = np.array(standard_array).reshape((dim, dim)) + standard_matrix = np.array(standard_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(0.1, 0.002, 'qr') for idl in [range(1, 501, 2), range(250, 273), [2, 8, 19, 20, 78]]: irregular_array = [] for i in range(dim ** 2): irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl))], ['ens1'], idl=[idl])) - irregular_matrix = np.array(irregular_array).reshape((dim, dim)) + irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs([1.0, 1.0], [[0.001,0.0001], [0.0001, 0.002]], 'norm')[0] t1 = standard_matrix @ irregular_matrix t2 = pe.linalg.matmul(standard_matrix, irregular_matrix) @@ -82,7 +213,7 @@ def test_irregular_matrix_inverse(): irregular_array = [] for i in range(dim ** 2): irregular_array.append(pe.Obs([np.random.normal(1.1, 0.2, len(idl)), np.random.normal(0.25, 0.1, 10)], ['ens1', 'ens2'], idl=[idl, range(1, 11)])) - irregular_matrix = np.array(irregular_array).reshape((dim, dim)) + irregular_matrix = np.array(irregular_array).reshape((dim, dim)) * pe.cov_Obs(1.0, 0.002, 'cov') * pe.pseudo_Obs(1.0, 0.002, 'ens2|r23') invertible_irregular_matrix = np.identity(dim) + irregular_matrix @ irregular_matrix.T @@ -113,8 +244,8 @@ def test_complex_matrix_inverse(): base_matrix = np.empty((dimension, dimension), dtype=object) matrix = np.empty((dimension, dimension), dtype=complex) for (n, m), entry in np.ndenumerate(base_matrix): - exponent_real = np.random.normal(3, 5) - exponent_imag = np.random.normal(3, 5) + exponent_real = np.random.normal(2, 3) + exponent_imag = np.random.normal(2, 3) base_matrix[n, m] = pe.CObs(pe.pseudo_Obs(2 + 10 ** exponent_real, 10 ** (exponent_real - 1), 't'), pe.pseudo_Obs(2 + 10 ** exponent_imag, 10 ** (exponent_imag - 1), 't')) @@ -169,6 +300,10 @@ def test_matrix_functions(): for j in range(dim): assert tmp[j].is_zero() + # Check eig function + e2 = pe.linalg.eig(sym) + assert np.all(np.sort(e) == np.sort(e2)) + # Check svd u, v, vh = pe.linalg.svd(sym) diff = sym - u @ np.diag(v) @ vh @@ -176,6 +311,9 @@ def test_matrix_functions(): for (i, j), entry in np.ndenumerate(diff): assert entry.is_zero() + # Check determinant + assert pe.linalg.det(np.diag(np.diag(matrix))) == np.prod(np.diag(matrix)) + def test_complex_matrix_operations(): dimension = 4 diff --git a/tests/obs_test.py b/tests/obs_test.py index f0adcc49..989a0064 100644 --- a/tests/obs_test.py +++ b/tests/obs_test.py @@ -9,6 +9,50 @@ import pytest np.random.seed(0) +def test_Obs_exceptions(): + with pytest.raises(Exception): + pe.Obs([np.random.rand(10)], ['1', '2']) + with pytest.raises(Exception): + pe.Obs([np.random.rand(10)], ['1'], idl=[]) + with pytest.raises(Exception): + pe.Obs([np.random.rand(10), np.random.rand(10)], ['1', '1']) + with pytest.raises(Exception): + pe.Obs([np.random.rand(10), np.random.rand(10)], ['1', 1]) + with pytest.raises(Exception): + pe.Obs([np.random.rand(10)], [1]) + with pytest.raises(Exception): + pe.Obs([np.random.rand(4)], ['name']) + with pytest.raises(Exception): + pe.Obs([np.random.rand(5)], ['1'], idl=[[5, 3, 2 ,4 ,1]]) + with pytest.raises(Exception): + pe.Obs([np.random.rand(5)], ['1'], idl=['t']) + with pytest.raises(Exception): + pe.Obs([np.random.rand(5)], ['1'], idl=[range(1, 8)]) + + my_obs = pe.Obs([np.random.rand(6)], ['name']) + my_obs._value = 0.0 + my_obs.details() + with pytest.raises(Exception): + my_obs.plot_tauint() + with pytest.raises(Exception): + my_obs.plot_rho() + with pytest.raises(Exception): + my_obs.plot_rep_dist() + with pytest.raises(Exception): + my_obs.plot_piechart() + with pytest.raises(Exception): + my_obs.gamma_method(S='2.3') + with pytest.raises(Exception): + my_obs.gamma_method(tau_exp=2.3) + my_obs.gamma_method() + my_obs.details() + my_obs.plot_rep_dist() + + my_obs += pe.Obs([np.random.rand(6)], ['name2|r1'], idl=[[1, 3, 4, 5, 6, 7]]) + my_obs += pe.Obs([np.random.rand(6)], ['name2|r2']) + my_obs.gamma_method() + my_obs.details() + def test_dump(): value = np.random.normal(5, 10) dvalue = np.abs(np.random.normal(0, 1)) @@ -95,6 +139,18 @@ def test_gamma_method(): assert test_obs.e_tauint['t'] - 10.5 <= test_obs.e_dtauint['t'] +def test_gamma_method_no_windowing(): + for iteration in range(50): + obs = pe.Obs([np.random.normal(1.02, 0.02, 733 + np.random.randint(1000))], ['ens']) + obs.gamma_method(S=0) + assert obs.e_tauint['ens'] == 0.5 + assert np.isclose(np.sqrt(np.var(obs.deltas['ens'], ddof=1) / obs.shape['ens']), obs.dvalue) + obs.gamma_method(S=1.1) + assert obs.e_tauint['ens'] > 0.5 + with pytest.raises(Exception): + obs.gamma_method(S=-0.2) + + def test_gamma_method_persistance(): my_obs = pe.Obs([np.random.rand(730)], ['t']) my_obs.gamma_method() @@ -183,7 +239,7 @@ def test_covariance_is_variance(): test_obs.gamma_method() assert np.abs(test_obs.dvalue ** 2 - pe.covariance(test_obs, test_obs)) <= 10 * np.finfo(np.float64).eps test_obs = test_obs + pe.pseudo_Obs(value, dvalue, 'q', 200) - test_obs.gamma_method(e_tag=0) + test_obs.gamma_method() assert np.abs(test_obs.dvalue ** 2 - pe.covariance(test_obs, test_obs)) <= 10 * np.finfo(np.float64).eps @@ -221,7 +277,7 @@ def test_gamma_method(): test_obs = pe.pseudo_Obs(value, dvalue, 't', int(1000 * (1 + np.random.rand()))) # Test if the error is processed correctly - test_obs.gamma_method(e_tag=1) + test_obs.gamma_method() assert np.abs(test_obs.value - value) < 1e-12 assert abs(test_obs.dvalue - dvalue) < 1e-10 * dvalue @@ -241,7 +297,7 @@ def test_derived_observables(): assert np.abs(d_Obs_ad.dvalue-d_Obs_fd.dvalue) < 1000 * np.finfo(np.float64).eps * d_Obs_ad.dvalue i_am_one = pe.derived_observable(lambda x, **kwargs: x[0] / x[1], [d_Obs_ad, d_Obs_ad]) - i_am_one.gamma_method(e_tag=1) + i_am_one.gamma_method() assert i_am_one.value == 1.0 assert i_am_one.dvalue < 2 * np.finfo(np.float64).eps @@ -290,15 +346,17 @@ def test_overloaded_functions(): for i, item in enumerate(funcs): ad_obs = item(test_obs) fd_obs = pe.derived_observable(lambda x, **kwargs: item(x[0]), [test_obs], num_grad=True) - ad_obs.gamma_method(S=0.01, e_tag=1) + ad_obs.gamma_method(S=0.01) assert np.max((ad_obs.deltas['t'] - fd_obs.deltas['t']) / ad_obs.deltas['t']) < 1e-8, item.__name__ assert np.abs((ad_obs.value - item(val)) / ad_obs.value) < 1e-10, item.__name__ assert np.abs(ad_obs.dvalue - dval * np.abs(deriv[i](val))) < 1e-6, item.__name__ def test_utils(): + zero_pseudo_obs = pe.pseudo_Obs(1.0, 0.0, 'null') my_obs = pe.pseudo_Obs(1.0, 0.5, 't|r01') my_obs += pe.pseudo_Obs(1.0, 0.5, 't|r02') + str(my_obs) for tau_exp in [0, 5]: my_obs.gamma_method(tau_exp=tau_exp) my_obs.tag = "Test description" @@ -313,6 +371,8 @@ def test_utils(): my_obs.plot_piechart() assert my_obs > (my_obs - 1) assert my_obs < (my_obs + 1) + float(my_obs) + str(my_obs) def test_cobs(): @@ -360,6 +420,15 @@ def test_reweighting(): r_obs2 = r_obs[0] * my_obs assert r_obs2.reweighted + my_irregular_obs = pe.Obs([np.random.rand(500)], ['t'], idl=[range(1, 1001, 2)]) + assert not my_irregular_obs.reweighted + r_obs = pe.reweight(my_obs, [my_irregular_obs], all_configs=True) + r_obs = pe.reweight(my_obs, [my_irregular_obs], all_configs=False) + r_obs = pe.reweight(my_obs, [my_obs]) + assert r_obs[0].reweighted + r_obs2 = r_obs[0] * my_obs + assert r_obs2.reweighted + def test_merge_obs(): my_obs1 = pe.Obs([np.random.rand(100)], ['t']) @@ -369,6 +438,16 @@ def test_merge_obs(): assert diff == -(my_obs1.value + my_obs2.value) / 2 +def test_merge_obs_r_values(): + a1 = pe.pseudo_Obs(1.1, .1, 'a|1') + a2 = pe.pseudo_Obs(1.2, .1, 'a|2') + a = pe.merge_obs([a1, a2]) + + assert np.isclose(a.r_values['a|1'], a1.value) + assert np.isclose(a.r_values['a|2'], a2.value) + assert np.isclose(a.value, np.mean([a1.value, a2.value])) + + def test_correlate(): my_obs1 = pe.Obs([np.random.rand(100)], ['t']) my_obs2 = pe.Obs([np.random.rand(100)], ['t']) @@ -397,6 +476,7 @@ def test_irregular_error_propagation(): pe.Obs([np.random.rand(6)], ['t'], idl=[[4, 18, 27, 29, 57, 80]]), pe.Obs([np.random.rand(50)], ['t'], idl=[list(range(1, 26)) + list(range(50, 100, 2))])] for obs1 in obs_list: + obs1.details() for obs2 in obs_list: assert obs1 == (obs1 / obs2) * obs2 assert obs1 == (obs1 * obs2) / obs2 @@ -447,8 +527,8 @@ def test_gamma_method_irregular(): idx2 = [i + 1 for i in range(len(configs)) if configs[i] == 1] a = pe.Obs([zero_arr, zero_arr2], ['a1', 'a2'], idl=[idx, idx2]) - afull.gamma_method(e_tag=1) - a.gamma_method(e_tag=1) + afull.gamma_method() + a.gamma_method() expe = (afull.dvalue * np.sqrt(N / np.sum(configs))) assert (a.dvalue - 5 * a.ddvalue < expe and expe < a.dvalue + 5 * a.ddvalue) @@ -462,20 +542,20 @@ def test_gamma_method_irregular(): arr = np.random.normal(1, .2, size=N) carr = gen_autocorrelated_array(arr, .346) a = pe.Obs([carr], ['a']) - a.gamma_method(e_tag=1) + a.gamma_method() ae = pe.Obs([[carr[i] for i in range(len(carr)) if i % 2 == 0]], ['a'], idl=[[i for i in range(len(carr)) if i % 2 == 0]]) - ae.gamma_method(e_tag=1) + ae.gamma_method() ao = pe.Obs([[carr[i] for i in range(len(carr)) if i % 2 == 1]], ['a'], idl=[[i for i in range(len(carr)) if i % 2 == 1]]) - ao.gamma_method(e_tag=1) + ao.gamma_method() assert(ae.e_tauint['a'] < a.e_tauint['a']) assert((ae.e_tauint['a'] - 4 * ae.e_dtauint['a'] < ao.e_tauint['a'])) assert((ae.e_tauint['a'] + 4 * ae.e_dtauint['a'] > ao.e_tauint['a'])) -def test_covariance2_symmetry(): +def test_covariance_symmetry(): value1 = np.random.normal(5, 10) dvalue1 = np.abs(np.random.normal(0, 1)) test_obs1 = pe.pseudo_Obs(value1, dvalue1, 't') @@ -484,8 +564,8 @@ def test_covariance2_symmetry(): dvalue2 = np.abs(np.random.normal(0, 1)) test_obs2 = pe.pseudo_Obs(value2, dvalue2, 't') test_obs2.gamma_method() - cov_ab = pe.covariance2(test_obs1, test_obs2) - cov_ba = pe.covariance2(test_obs2, test_obs1) + cov_ab = pe.covariance(test_obs1, test_obs2) + cov_ba = pe.covariance(test_obs2, test_obs1) assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * (1 + 10 * np.finfo(np.float64).eps) @@ -498,14 +578,20 @@ def test_covariance2_symmetry(): idx = [i + 1 for i in range(len(configs)) if configs[i] == 1] a = pe.Obs([zero_arr], ['t'], idl=[idx]) a.gamma_method() - assert np.isclose(a.dvalue**2, pe.covariance2(a, a), atol=100, rtol=1e-4) + assert np.isclose(a.dvalue**2, pe.covariance(a, a), atol=100, rtol=1e-4) - cov_ab = pe.covariance2(test_obs1, a) - cov_ba = pe.covariance2(a, test_obs1) + cov_ab = pe.covariance(test_obs1, a) + cov_ba = pe.covariance(a, test_obs1) assert np.abs(cov_ab - cov_ba) <= 10 * np.finfo(np.float64).eps assert np.abs(cov_ab) < test_obs1.dvalue * test_obs2.dvalue * (1 + 10 * np.finfo(np.float64).eps) +def test_empty_obs(): + o = pe.Obs([np.random.rand(100)], ['test']) + q = o + pe.Obs([], []) + assert q == o + + def test_jackknife(): full_data = np.random.normal(1.1, 0.87, 5487) @@ -522,3 +608,11 @@ def test_jackknife(): my_new_obs = my_obs + pe.Obs([full_data], ['test2']) with pytest.raises(Exception): my_new_obs.export_jackknife() + + +def test_import_jackknife(): + full_data = np.random.normal(1.105, 0.021, 754) + my_obs = pe.Obs([full_data], ['test']) + my_jacks = my_obs.export_jackknife() + reconstructed_obs = pe.import_jackknife(my_jacks, 'test') + assert my_obs == reconstructed_obs diff --git a/tests/roots_test.py b/tests/roots_test.py index d7c4ed1f..dbb27bbe 100644 --- a/tests/roots_test.py +++ b/tests/roots_test.py @@ -15,5 +15,18 @@ def test_root_linear(): my_root = pe.roots.find_root(my_obs, root_function) assert np.isclose(my_root.value, value) + assert np.isclose(my_root.value, my_root.r_values['t']) + difference = my_obs - my_root + assert difference.is_zero() + + +def test_root_linear_idl(): + + def root_function(x, d): + return x - d + + my_obs = pe.Obs([np.random.rand(50)], ['t'], idl=[range(20, 120, 2)]) + my_root = pe.roots.find_root(my_obs, root_function) + difference = my_obs - my_root assert difference.is_zero()