Merge branch 'develop' into feature/dobs

This commit is contained in:
Simon Kuberski 2022-05-02 14:21:25 +02:00
commit 44b268fcf9
10 changed files with 218 additions and 124 deletions

View file

@ -2,53 +2,54 @@
All notable changes to this project will be documented in this file.
## [2.0.0] - 2022-??-??
## [2.0.0] - 2022-03-31
### Added
- The possibility to work with Monte Carlo histories which are evenly or unevenly spaced was added.
- `cov_Obs` added as a possibility to propagate the error of non Monte Carlo data together with Monte Carlo data.
- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation.
- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int`.
- Support for a new `json.gz` file format was added
- Support for a new `json.gz` file format was added.
- The Corr class now has additional methods like `reverse`, `T_symmetry`, `correlate` and `reweight`.
- `Corr.m_eff` can now cope with periodic and anti-periodic correlation functions
- Forward, backward and improved variants of the first and second derivative were added to the `Corr` class
- The `linalg` module now has explicit functions `inv` and `cholesky`.
- `Corr.m_eff` can now cope with periodic and anti-periodic correlation functions.
- Forward, backward and improved variants of the first and second derivative were added to the `Corr` class.
- `GEVP` functionality of the `Corr` class was reworked and improved.
- The `linalg` module now has explicit functions `inv`, `cholesky` and `det`.
- `Obs` objects now have methods `is_zero` and `is_zero_within_error` as well as overloaded comparison operations.
- Functions to convert Obs data to or from jackknife was added.
- Alternative matrix multiplication routine `jack_matmul` was added to `linalg` module which makes use of the jackknife approximation and is much faster for large matrices.
- Functions to convert `Obs` data to or from jackknife was added.
- Alternative matrix multiplication routines `einsum` and `jack_matmul` were added to `linalg` module which make use of the jackknife approximation and are much faster for large matrices.
- Additional input routines for npr data added to `input.hadrons`.
- The `sfcf` and `openQCD` input modules can now handle all recent file type versions.
- `extract_t0` can now visualize the extraction on the fly
- `extract_t0` can now visualize the extraction on the fly.
- Module added which provides the Dirac gamma matrices in the Grid convention.
- Version number added.
### Changed
- The internal bookkeeping system for ensembles/replica was changed. The separator for replica is now `|`.
- The fit functions were renamed to `least_squares` and `total_least_squares`.
- The output of the fit functions is now a dedicated results class which keeps track of all relevant information
- The output of the fit functions is now a dedicated results class which keeps track of all relevant information.
- The fit functions can now deal with provided covariance matrices.
- `covariance` can now operate on a list or array of `Obs` and returns a matrix
- `covariance` can now operate on a list or array of `Obs` and returns a matrix. The covariance estimate by pyerrors is now always positive semi-definite (within machine precision. Various warnings and exceptions were added for cases in which estimated covariances are close to singular.
- The convention for the fit range in the Corr class has been changed.
- Various method of the `Corr` class were renamed
- Various method of the `Corr` class were renamed.
- `Obs.print` was renamed to `Obs.details` and the output was improved.
- The default value for `Corr.prange` is now `None`.
- The `input` module was restructured to contain one submodule per data source.
- Performance of Obs.__init__ improved.
### Removed
- The function `plot_corrs` was deprecated as all its functionality is now contained within `Corr.show`
- `fits.covariance_matrix` was removed as it is now redundant with the functionality of `covariance`
- The kwarg `bias_correction` in `derived_observable` was removed
- Obs no longer have an attribute `e_Q`
- Removed `fits.fit_exp`
- Removed jackknife module
- The function `plot_corrs` was deprecated as all its functionality is now contained within `Corr.show`.
- `fits.covariance_matrix` was removed as it is now redundant with the functionality of `covariance`.
- The kwarg `bias_correction` in `derived_observable` was removed.
- Obs no longer have an attribute `e_Q`.
- Removed `fits.fit_exp`.
- Removed jackknife module.
## [1.1.0] - 2021-10-11
### Added
- `Corr` class added
- `roots` module added which can find the roots of a function that depends on Monte Carlo data via pyerrors `Obs`
- `input/hadrons` module added which can read hdf5 files written by [Hadrons](https://github.com/aportelli/Hadrons)
- `read_rwms` can now read reweighting factors in the format used by openQCD-2.0
- `read_rwms` can now read reweighting factors in the format used by openQCD-2.0.
## [1.0.1] - 2020-11-03
### Fixed

View file

@ -621,7 +621,7 @@ class Corr:
raise Exception('Unknown variant.')
def fit(self, function, fitrange=None, silent=False, **kwargs):
"""Fits function to the data
r'''Fits function to the data
Parameters
----------
@ -629,10 +629,12 @@ class Corr:
function to fit to the data. See fits.least_squares for details.
fitrange : list
Two element list containing the timeslices on which the fit is supposed to start and stop.
Caution: This range is inclusive as opposed to standard python indexing.
`fitrange=[4, 6]` corresponds to the three entries 4, 5 and 6.
If not specified, self.prange or all timeslices are used.
silent : bool
Decides whether output is printed to the standard output.
"""
'''
if self.N != 1:
raise Exception("Correlator must be projected before fitting")

View file

@ -461,7 +461,7 @@ def _standard_fit(x, y, func, silent=False, **kwargs):
x0 = [0.1] * n_parms
if kwargs.get('correlated_fit') is True:
corr = covariance(y, correlation=True)
corr = covariance(y, correlation=True, **kwargs)
covdiag = np.diag(1 / np.asarray(dy_f))
condn = np.linalg.cond(corr)
if condn > 0.1 / np.finfo(float).eps:
@ -635,9 +635,10 @@ def qqplot(x, o_y, func, p):
def residual_plot(x, y, func, fit_res):
""" Generates a plot which compares the fit to the data and displays the corresponding residuals"""
xstart = x[0] - 0.5
xstop = x[-1] + 0.5
x_samples = np.arange(xstart, xstop, 0.01)
sorted_x = sorted(x)
xstart = sorted_x[0] - 0.5 * (sorted_x[1] - sorted_x[0])
xstop = sorted_x[-1] + 0.5 * (sorted_x[-1] - sorted_x[-2])
x_samples = np.arange(xstart, xstop + 0.01, 0.01)
plt.figure(figsize=(8, 8 / 1.618))
gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)

View file

@ -302,11 +302,24 @@ def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
stop -- stops reading at given configuration number (default None)
alternative_ensemble_name -- Manually overwrite ensemble name
file_path : str
path to the bdio file
bdio_path : str
path to the shared bdio library libbdio.so (default ./libbdio.so)
start : int
The first configuration to be read (default 1)
stop : int
The last configuration to be read (default None)
step : int
Fixed step size between two measurements (default 1)
alternative_ensemble_name : str
Manually overwrite ensemble name
"""
start = kwargs.get('start', 1)
stop = kwargs.get('stop', None)
step = kwargs.get('step', 1)
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
@ -353,9 +366,9 @@ def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
prop_kappa = [] # Contains propagator kappas (Component of corr_kappa)
prop_source = [] # Contains propagator source positions
# Check noise type for multiple replica?
cnfg_no = -1
corr_no = -1
data = []
idl = []
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), ctypes.c_char_p(b_form))
@ -418,18 +431,20 @@ def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
prop_kappa.append(_get_kwd(tmp_string, 'KAPPA='))
prop_source.append(_get_kwd(tmp_string, 'x0='))
if ruinfo == 4:
if 'stop' in kwargs:
if cnfg_no >= kwargs.get('stop') - 1:
cnfg_no = int(_get_kwd(tmp_string, 'CNFG_ID='))
if stop:
if cnfg_no > kwargs.get('stop'):
break
cnfg_no += 1
print('\r%s %i' % ('Reading configuration', cnfg_no + 1), end='\r')
if cnfg_no == 0:
idl.append(cnfg_no)
print('\r%s %i' % ('Reading configuration', cnfg_no), end='\r')
if len(idl) == 1:
no_corrs = len(corr_name)
data = []
for c in range(no_corrs):
data.append([])
corr_no = 0
bdio_close(fbdio)
print('\nEnsemble: ', ensemble_name)
@ -441,7 +456,7 @@ def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
print('Number of time values: ', d0)
print('Number of random sources: ', d1)
print('Number of corrs: ', len(corr_name))
print('Number of configurations: ', cnfg_no + 1)
print('Number of configurations: ', len(idl))
corr_kappa = [] # Contains kappa values for both propagators of given correlation function
corr_source = []
@ -452,12 +467,29 @@ def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
else:
corr_source.append(int(prop_source[int(item[0])]))
if stop is None:
stop = idl[-1]
idl_target = range(start, stop + 1, step)
if set(idl) != set(idl_target):
try:
indices = [idl.index(i) for i in idl_target]
except ValueError as err:
raise Exception('Configurations in file do no match target list!', err)
else:
indices = None
result = {}
for c in range(no_corrs):
tmp_corr = []
tmp_data = np.asarray(data[c])
for t in range(d0 - 2):
tmp_corr.append(Obs([np.asarray(data[c])[:, t]], [ensemble_name]))
result[(corr_name[c], corr_source[c]) + tuple(sorted(corr_kappa[c]))] = tmp_corr
if indices:
deltas = [tmp_data[:, t][index] for index in indices]
else:
deltas = tmp_data[:, t]
tmp_corr.append(Obs([deltas], [ensemble_name], idl=[idl_target]))
result[(corr_name[c], corr_source[c]) + tuple(corr_kappa[c])] = tmp_corr
# Check that all data entries have the same number of configurations
if len(set([o[0].N for o in list(result.values())])) != 1:
@ -480,10 +512,24 @@ def read_dSdm(file_path, bdio_path='./libbdio.so', **kwargs):
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
stop -- stops reading at given configuration number (default None)
file_path : str
path to the bdio file
bdio_path : str
path to the shared bdio library libbdio.so (default ./libbdio.so)
start : int
The first configuration to be read (default 1)
stop : int
The last configuration to be read (default None)
step : int
Fixed step size between two measurements (default 1)
alternative_ensemble_name : str
Manually overwrite ensemble name
"""
start = kwargs.get('start', 1)
stop = kwargs.get('stop', None)
step = kwargs.get('step', 1)
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
@ -529,9 +575,9 @@ def read_dSdm(file_path, bdio_path='./libbdio.so', **kwargs):
# d1 = 0 # nnoise
prop_kappa = [] # Contains propagator kappas (Component of corr_kappa)
# Check noise type for multiple replica?
cnfg_no = -1
corr_no = -1
data = []
idl = []
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), ctypes.c_char_p(b_form))
@ -586,12 +632,13 @@ def read_dSdm(file_path, bdio_path='./libbdio.so', **kwargs):
if ruinfo == 2:
prop_kappa.append(_get_kwd(tmp_string, 'KAPPA='))
if ruinfo == 4:
if 'stop' in kwargs:
if cnfg_no >= kwargs.get('stop') - 1:
cnfg_no = int(_get_kwd(tmp_string, 'CNFG_ID='))
if stop:
if cnfg_no > kwargs.get('stop'):
break
cnfg_no += 1
print('\r%s %i' % ('Reading configuration', cnfg_no + 1), end='\r')
if cnfg_no == 0:
idl.append(cnfg_no)
print('\r%s %i' % ('Reading configuration', cnfg_no), end='\r')
if len(idl) == 1:
no_corrs = len(corr_name)
data = []
for c in range(no_corrs):
@ -612,9 +659,18 @@ def read_dSdm(file_path, bdio_path='./libbdio.so', **kwargs):
for item in corr_props:
corr_kappa.append(float(prop_kappa[int(item)]))
if stop is None:
stop = idl[-1]
idl_target = range(start, stop + 1, step)
try:
indices = [idl.index(i) for i in idl_target]
except ValueError as err:
raise Exception('Configurations in file do no match target list!', err)
result = {}
for c in range(no_corrs):
result[(corr_name[c], str(corr_kappa[c]))] = Obs([np.asarray(data[c])], [ensemble_name])
deltas = [np.asarray(data[c])[index] for index in indices]
result[(corr_name[c], str(corr_kappa[c]))] = Obs([deltas], [ensemble_name], idl=[idl_target])
# Check that all data entries have the same number of configurations
if len(set([o.N for o in list(result.values())])) != 1:

View file

@ -358,7 +358,7 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl'])
ret.is_merged = od['is_merged']
else:
ret = Obs([], [])
ret = Obs([], [], means=[])
ret._value = values[0]
for name in cd:
co = cd[name][0]
@ -383,7 +383,7 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl']))
ret[-1].is_merged = od['is_merged']
else:
ret.append(Obs([], []))
ret.append(Obs([], [], means=[]))
ret[-1]._value = values[i]
print('Created Obs with means= ', values[i])
for name in cd:
@ -410,7 +410,7 @@ def _parse_json_dict(json_dict, verbose=True, full_output=False):
ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl']))
ret[-1].is_merged = od['is_merged']
else:
ret.append(Obs([], []))
ret.append(Obs([], [], means=[]))
ret[-1]._value = values[i]
for name in cd:
co = cd[name][i]

View file

@ -90,8 +90,10 @@ class Obs:
self.deltas = {}
self._covobs = {}
self._value = 0
self.N = 0
self.is_merged = {}
self.idl = {}
if len(samples):
if idl is not None:
for name, idx in sorted(zip(names, idl)):
if isinstance(idx, range):
@ -110,8 +112,6 @@ class Obs:
for name, sample in sorted(zip(names, samples)):
self.idl[name] = range(1, len(sample) + 1)
self._value = 0
self.N = 0
if kwargs.get("means") is not None:
for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))):
self.shape[name] = len(self.idl[name])
@ -129,13 +129,6 @@ class Obs:
self._value += self.shape[name] * self.r_values[name]
self._value /= self.N
self.is_merged = {}
else:
self._value = 0
self.is_merged = {}
self.N = 0
self._dvalue = 0.0
self.ddvalue = 0.0
self.reweighted = False
@ -981,7 +974,7 @@ def _merge_idx(idl):
def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
"""Expand deltas defined on idx to the list of configs that is defined by new_idx.
New, empy entries are filled by 0. If idx and new_idx are of type range, the smallest
New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest
common divisor of the step sizes is used as new step size.
Parameters
@ -1339,7 +1332,7 @@ def correlate(obs_a, obs_b):
return o
def covariance(obs, visualize=False, correlation=False, **kwargs):
def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs):
r'''Calculates the covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
@ -1352,6 +1345,11 @@ def covariance(obs, visualize=False, correlation=False, **kwargs):
If True plots the corresponding normalized correlation matrix (default False).
correlation : bool
If True the correlation instead of the covariance is returned (default False).
smooth : None or int
If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue
smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the
largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely
small ones.
Notes
-----
@ -1376,6 +1374,9 @@ def covariance(obs, visualize=False, correlation=False, **kwargs):
corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov)))
if isinstance(smooth, int):
corr = _smooth_eigenvalues(corr, smooth)
errors = [o.dvalue for o in obs]
cov = np.diag(errors) @ corr @ np.diag(errors)
@ -1395,34 +1396,29 @@ def covariance(obs, visualize=False, correlation=False, **kwargs):
return cov
def _smooth_eigenvalues(corr, E):
"""Eigenvalue smoothing as described in hep-lat/9412087
corr : np.ndarray
correlation matrix
E : integer
Number of eigenvalues to be left substantially unchanged
"""
if not (2 < E < corr.shape[0] - 1):
raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).")
vals, vec = np.linalg.eigh(corr)
lambda_min = np.mean(vals[:-E])
vals[vals < lambda_min] = lambda_min
vals /= np.mean(vals)
return vec @ np.diag(vals) @ vec.T
def _covariance_element(obs1, obs2):
"""Estimates the covariance of two Obs objects, neglecting autocorrelations."""
def expand_deltas(deltas, idx, shape, new_idx):
"""Expand deltas defined on idx to a contiguous range [new_idx[0], new_idx[-1]].
New, empy entries are filled by 0. If idx and new_idx are of type range, the smallest
common divisor of the step sizes is used as new step size.
Parameters
----------
deltas -- List of fluctuations
idx -- List or range of configs on which the deltas are defined.
Has to be a subset of new_idx.
shape -- Number of configs in idx.
new_idx -- List of configs that defines the new range.
"""
if type(idx) is range and type(new_idx) is range:
if idx == new_idx:
return deltas
ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
for i in range(shape):
ret[idx[i] - new_idx[0]] = deltas[i]
return ret
def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx):
deltas1 = expand_deltas(deltas1, idx1, len(idx1), new_idx)
deltas2 = expand_deltas(deltas2, idx2, len(idx2), new_idx)
deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx)
deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx)
return np.sum(deltas1 * deltas2)
if set(obs1.names).isdisjoint(set(obs2.names)):
@ -1490,7 +1486,8 @@ def import_jackknife(jacks, name, idl=None):
length = len(jacks) - 1
prj = (np.ones((length, length)) - (length - 1) * np.identity(length))
samples = jacks[1:] @ prj
new_obs = Obs([samples], [name], idl=idl)
mean = np.mean(samples)
new_obs = Obs([samples - mean], [name], idl=idl, means=[mean])
new_obs._value = jacks[0]
return new_obs
@ -1549,7 +1546,7 @@ def cov_Obs(means, cov, name, grad=None):
co : Covobs
Covobs to be embedded into the Obs
"""
o = Obs([], [])
o = Obs([], [], means=[])
o._value = co.value
o.names.append(co.name)
o._covobs[co.name] = co

View file

@ -1 +1 @@
__version__ = "2.0.0-rc.3+dev"
__version__ = "2.1.0+dev"

View file

@ -3,7 +3,7 @@
from setuptools import setup, find_packages
setup(name='pyerrors',
version='2.0.0-rc.3+dev',
version='2.1.0+dev',
description='Error analysis for lattice QCD',
author='Fabian Joswig',
author_email='fabian.joswig@ed.ac.uk',

View file

@ -157,6 +157,28 @@ def test_correlated_fit():
assert(diff.is_zero_within_error(sigma=5))
def test_fit_corr_independent():
dim = 50
x = np.arange(dim)
y = 0.84 * np.exp(-0.12 * x) + np.random.normal(0.0, 0.1, dim)
yerr = [0.1] * dim
oy = []
for i, item in enumerate(x):
oy.append(pe.pseudo_Obs(y[i], yerr[i], str(i)))
def func(a, x):
y = a[0] * anp.exp(-a[1] * x)
return y
out = pe.least_squares(x, oy, func)
out_corr = pe.least_squares(x, oy, func, correlated_fit=True)
assert np.isclose(out.chisquare, out_corr.chisquare)
assert (out[0] - out_corr[0]).is_zero(atol=1e-5)
assert (out[1] - out_corr[1]).is_zero(atol=1e-5)
def test_total_least_squares():
dim = 10 + int(30 * np.random.rand())
x = np.arange(dim) + np.random.normal(0.0, 0.15, dim)

View file

@ -510,6 +510,10 @@ def test_correlate():
pe.correlate(r_obs, r_obs)
def test_merge_idx():
assert pe.obs._merge_idx([range(10, 1010, 10), range(10, 1010, 50)]) == range(10, 1010, 10)
assert pe.obs._merge_idx([range(500, 6050, 50), range(500, 6250, 250)]) == range(500, 6250, 50)
def test_irregular_error_propagation():
obs_list = [pe.Obs([np.random.rand(100)], ['t']),
@ -713,10 +717,21 @@ def test_covariance_rank_deficient():
with pytest.warns(RuntimeWarning):
pe.covariance(obs)
def test_covariance_idl():
range1 = range(10, 1010, 10)
range2 = range(10, 1010, 50)
obs1 = pe.Obs([np.random.normal(1.0, 0.1, len(range1))], ["ens"], idl=[range1])
obs2 = pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
obs1.gamma_method()
obs2.gamma_method()
pe.covariance([obs1, obs2])
def test_empty_obs():
o = pe.Obs([np.random.rand(100)], ['test'])
q = o + pe.Obs([], [])
q = o + pe.Obs([], [], means=[])
assert q == o
@ -769,6 +784,7 @@ def test_merge_idx():
for i in range(1, len(new_idx)):
assert(new_idx[i - 1] < new_idx[i])
def test_cobs_array():
cobs = pe.Obs([np.random.normal(1.0, 0.1, 100)], ['t']) * (1 + 2j)
np.identity(4) + cobs
@ -779,4 +795,3 @@ def test_cobs_array():
cobs * np.identity(4)
np.identity(4) / cobs
cobs / np.ones((4, 4))