Merge branch 'develop' into feature/covobs

This commit is contained in:
Simon Kuberski 2021-12-01 15:15:20 +01:00
commit 35c1a0e1bd
8 changed files with 509 additions and 60 deletions

View file

@ -4,20 +4,33 @@ All notable changes to this project will be documented in this file.
## [2.0.0] - 2021-??-??
### Added
- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation
- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int`
- `Obs` objects now have methods `is_zero` and `is_zero_within_error`
- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation.
- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int`.
- The possibility to work with Monte Carlo histories which are evenly or unevenly spaced was added.
- The Corr class now has additional methods like `reverse`, `T_symmetry`, `correlate` and `reweight`.
- `linalg` module now has explicit functions `inv` and `cholesky`.
- `Obs` objects now have methods `is_zero` and `is_zero_within_error` as well as overloaded comparison operations.
- Functions to convert Obs data to or from jackknife was added.
- Alternative matrix multiplication routine `jack_matmul` was added to `linalg` module which makes use of the jackknife approximation and is much faster for large matrices.
- Additional input routines for npr data added to `input.hadrons`.
- Version number added.
### Changed
- Additional attributes can no longer be added to existing `Obs`. This makes it no longer possible to import `Obs` created with previous versions of pyerrors
- The default value for `Corr.prange` is now `None`
- The `input` module was restructured to contain one submodule per data source
- The internal bookkeeping system for ensembles/replica was changed. The separator for replica is now `|`.
- The fit functions were renamed to `least_squares` and `total_least_squares`.
- The fit functions can now deal with provided covariance matrices.
- The convention for the fit range in the Corr class has been changed.
- Obs.print was renamed to Obs.details and the output was improved.
- The default value for `Corr.prange` is now `None`.
- The `input` module was restructured to contain one submodule per data source.
- Performance of Obs.__init__ improved.
### Deprecated
- The function `plot_corrs` was deprecated as all its functionality is now contained within `Corr.show`
- The kwarg `bias_correction` in `derived_observable` was removed
- Obs no longer have an attribute `e_Q`
- Removed `fits.fit_exp`
- Removed jackknife module
## [1.1.0] - 2021-10-11
### Added
@ -77,7 +90,7 @@ All notable changes to this project will be documented in this file.
## [0.7.0] - 2020-03-10
### Added
- New fit funtions for fitting with and without x-errors added which use automatic differentiation and should be more reliable than the old ones.
- New fit functions for fitting with and without x-errors added which use automatic differentiation and should be more reliable than the old ones.
- Fitting with Bayesian priors added.
- New functions for visualization of fits which can be activated via the kwargs resplot and qqplot.
- chisquare/expected_chisquared which takes into account correlations in the data and non-linearities in the fit function can now be activated with the kwarg expected_chisquare.

View file

@ -3,7 +3,7 @@
`pyerrors` is a python package for error computation and propagation of Markov chain Monte Carlo data.
- **Documentation:** https://fjosw.github.io/pyerrors/pyerrors.html
- **Examples**: https://github.com/fjosw/pyerrors/tree/develop/examples
- **Examples**: https://github.com/fjosw/pyerrors/tree/develop/examples (Do not work properly at the moment)
- **Contributing:** https://github.com/fjosw/pyerrors/blob/develop/CONTRIBUTING.md
- **Bug reports:** https://github.com/fjosw/pyerrors/issues
@ -16,9 +16,3 @@ to install the most recent release run
```bash
pip install git+https://github.com/fjosw/pyerrors.git@master
```
## Other implementations
There exist similar publicly available implementations of gamma method error analysis suites in
- [Fortran](https://gitlab.ift.uam-csic.es/alberto/aderrors)
- [Julia](https://gitlab.ift.uam-csic.es/alberto/aderrors.jl)
- [Python](https://github.com/mbruno46/pyobs)

View file

@ -8,6 +8,11 @@ It is based on the **gamma method** [arXiv:hep-lat/0306017](https://arxiv.org/ab
- **non-linear fits with x- and y-errors** and exact linear error propagation based on automatic differentiation as introduced in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289)
- **real and complex matrix operations** and their error propagation based on automatic differentiation (Cholesky decomposition, calculation of eigenvalues and eigenvectors, singular value decomposition...)
There exist similar publicly available implementations of gamma method error analysis suites in
- [Fortran](https://gitlab.ift.uam-csic.es/alberto/aderrors)
- [Julia](https://gitlab.ift.uam-csic.es/alberto/aderrors.jl)
- [Python](https://github.com/mbruno46/pyobs)
## Basic example
```python

View file

@ -1,5 +1,6 @@
from . import bdio
from . import hadrons
from . import sfcf
from . import openQCD
from . import json
from . import misc
from . import openQCD
from . import sfcf

View file

@ -224,7 +224,7 @@ def read_Bilinear_hd5(path, filestem, ens_id, idl=None):
return result_dict
def read_Fourquark_hd5(path, filestem, ens_id, idl=None):
def read_Fourquark_hd5(path, filestem, ens_id, idl=None, vertices=["VA", "AV"]):
"""Read hadrons FourquarkFullyConnected hdf5 file and output an array of CObs
Parameters
@ -237,6 +237,8 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None):
name of the ensemble, required for internal bookkeeping
idl : range
If specified only configurations in the given range are read in.
vertices : list
Vertex functions to be extracted.
"""
files, idx = _get_files(path, filestem, idl)
@ -244,6 +246,10 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None):
mom_in = None
mom_out = None
vertex_names = []
for vertex in vertices:
vertex_names += _get_lorentz_names(vertex)
corr_data = {}
tree = 'FourQuarkFullyConnected/FourQuarkFullyConnected_'
@ -251,8 +257,9 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None):
for hd5_file in files:
file = h5py.File(path + '/' + hd5_file, "r")
for i in range(1):
name = file[tree + str(i) + '/info'].attrs['gammaA'][0].decode('UTF-8') + '_' + file[tree + str(i) + '/info'].attrs['gammaB'][0].decode('UTF-8')
for i in range(32):
name = (file[tree + str(i) + '/info'].attrs['gammaA'][0].decode('UTF-8'), file[tree + str(i) + '/info'].attrs['gammaB'][0].decode('UTF-8'))
if name in vertex_names:
if name not in corr_data:
corr_data[name] = []
raw_data = file[tree + str(i) + '/corr'][0][0].view('complex')
@ -264,12 +271,21 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None):
file.close()
intermediate_dict = {}
for vertex in vertices:
lorentz_names = _get_lorentz_names(vertex)
for v_name in lorentz_names:
if vertex not in intermediate_dict:
intermediate_dict[vertex] = np.array(corr_data[v_name])
else:
intermediate_dict[vertex] += np.array(corr_data[v_name])
result_dict = {}
for key, data in corr_data.items():
local_data = np.array(data)
for key, data in intermediate_dict.items():
rolled_array = np.moveaxis(local_data, 0, 8)
rolled_array = np.moveaxis(data, 0, 8)
matrix = np.empty((rolled_array.shape[:-1]), dtype=object)
for index in np.ndindex(rolled_array.shape[:-1]):
@ -278,6 +294,38 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None):
matrix[index] = CObs(real, imag)
result_dict[key] = Npr_matrix(matrix, mom_in=mom_in, mom_out=mom_out)
# result_dict[key] = Npr_matrix(matrix.swapaxes(1, 2).reshape((12, 12), order='F'), mom_in=mom_in, mom_out=mom_out)
return result_dict
def _get_lorentz_names(name):
assert len(name) == 2
res = []
if not set(name) <= set(['S', 'P', 'V', 'A', 'T']):
raise Exception("Name can only contain 'S', 'P', 'V', 'A' or 'T'")
if 'S' in name or 'P' in name:
if not set(name) <= set(['S', 'P']):
raise Exception("'" + name + "' is not a Lorentz scalar")
g_names = {'S': 'Identity',
'P': 'Gamma5'}
res.append((g_names[name[0]], g_names[name[1]]))
elif 'T' in name:
if not set(name) <= set(['T']):
raise Exception("'" + name + "' is not a Lorentz scalar")
raise Exception("Tensor operators not yet implemented.")
else:
if not set(name) <= set(['V', 'A']):
raise Exception("'" + name + "' is not a Lorentz scalar")
lorentz_index = ['X', 'Y', 'Z', 'T']
for ind in lorentz_index:
res.append(('Gamma' + ind + (name[0] == 'A') * 'Gamma5',
'Gamma' + ind + (name[1] == 'A') * 'Gamma5'))
return res

327
pyerrors/input/json.py Normal file
View file

@ -0,0 +1,327 @@
import json
import gzip
from ..obs import Obs
import getpass
import socket
import datetime
from .. import version as pyerrorsversion
import platform
import numpy as np
import warnings
def create_json_string(ol, fname, description='', indent=1):
"""Generate the string for the export of a list of Obs or structures containing Obs
to a .json(.gz) file
Parameters
-----------------
ol : list
List of objects that will be exported. At the moments, these objects can be
either of: Obs, list, np.ndarray
All Obs inside a structure have to be defined on the same set of configurations.
fname : str
Filename of the output file
description : str
Optional string that describes the contents of the json file
indent : int
Specify the indentation level of the json file. None or 0 is permissible and
saves disk space.
"""
def _default(self, obj):
return str(obj)
my_encoder = json.JSONEncoder
_default.default = json.JSONEncoder().default
my_encoder.default = _default
class deltalist:
def __init__(self, li):
self.cnfg = li[0]
self.deltas = li[1:]
def __repr__(self):
s = '[%d' % (self.cnfg)
for d in self.deltas:
s += ', %1.15e' % (d)
s += ']'
return s
def __str__(self):
return self.__repr__()
def _gen_data_d_from_list(ol):
dl = []
for name in ol[0].e_names:
ed = {}
ed['id'] = name
ed['replica'] = []
for r_name in ol[0].e_content[name]:
rd = {}
rd['name'] = r_name
if ol[0].is_merged.get(r_name, False):
rd['is_merged'] = True
rd['deltas'] = []
for i in range(len(ol[0].idl[r_name])):
rd['deltas'].append([ol[0].idl[r_name][i]])
for o in ol:
rd['deltas'][-1].append(o.deltas[r_name][i])
rd['deltas'][-1] = deltalist(rd['deltas'][-1])
ed['replica'].append(rd)
dl.append(ed)
return dl
def _assert_equal_properties(ol, otype=Obs):
for o in ol:
if not isinstance(o, otype):
raise Exception('Wrong data type in list!')
for o in ol[1:]:
if not ol[0].is_merged == o.is_merged:
raise Exception('All Obs in list have to be defined on the same set of configs!')
if not ol[0].reweighted == o.reweighted:
raise Exception('All Obs in list have to have the same property .reweighted!')
if not ol[0].e_content == o.e_content:
raise Exception('All Obs in list have to be defined on the same set of configs!')
# more stringend tests --> compare idl?
def write_Obs_to_dict(o):
d = {}
d['type'] = 'Obs'
d['layout'] = '1'
if o.tag:
d['tag'] = o.tag
if o.reweighted:
d['reweighted'] = o.reweighted
d['value'] = [o.value]
d['data'] = _gen_data_d_from_list([o])
return d
def write_List_to_dict(ol):
_assert_equal_properties(ol)
d = {}
d['type'] = 'List'
d['layout'] = '%d' % len(ol)
if ol[0].tag:
d['tag'] = ol[0].tag
if isinstance(ol[0].tag, str):
if len(set([o.tag for o in ol])) > 1:
d['tag'] = ''
for o in ol:
d['tag'] += '%s\n' % (o.tag)
if ol[0].reweighted:
d['reweighted'] = ol[0].reweighted
d['value'] = [o.value for o in ol]
d['data'] = _gen_data_d_from_list(ol)
return d
def write_Array_to_dict(oa):
ol = np.ravel(oa)
_assert_equal_properties(ol)
d = {}
d['type'] = 'Array'
d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',')
if ol[0].tag:
d['tag'] = ol[0].tag
if isinstance(ol[0].tag, str):
if len(set([o.tag for o in ol])) > 1:
d['tag'] = ''
for o in ol:
d['tag'] += '%s\n' % (o.tag)
if ol[0].reweighted:
d['reweighted'] = ol[0].reweighted
d['value'] = [o.value for o in ol]
d['data'] = _gen_data_d_from_list(ol)
return d
if not isinstance(ol, list):
ol = [ol]
d = {}
d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__)
d['version'] = '0.1'
d['who'] = getpass.getuser()
d['date'] = str(datetime.datetime.now())[:-7]
d['host'] = socket.gethostname() + ', ' + platform.platform()
if description:
d['description'] = description
d['obsdata'] = []
for io in ol:
if isinstance(io, Obs):
d['obsdata'].append(write_Obs_to_dict(io))
elif isinstance(io, list):
d['obsdata'].append(write_List_to_dict(io))
elif isinstance(io, np.ndarray):
d['obsdata'].append(write_Array_to_dict(io))
jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False)
# workaround for un-indentation of delta lists
jsonstring = jsonstring.replace(' "[', ' [').replace(']",', '],').replace(']"\n', ']\n')
return jsonstring
def dump_to_json(ol, fname, description='', indent=1, gz=True):
"""Export a list of Obs or structures containing Obs to a .json(.gz) file
Parameters
-----------------
ol : list
List of objects that will be exported. At the moments, these objects can be
either of: Obs, list, np.ndarray
All Obs inside a structure have to be defined on the same set of configurations.
fname : str
Filename of the output file
description : str
Optional string that describes the contents of the json file
indent : int
Specify the indentation level of the json file. None or 0 is permissible and
saves disk space.
gz : bool
If True, the output is a gzipped json. If False, the output is a json file.
"""
jsonstring = create_json_string(ol, fname, description, indent)
if not fname.endswith('.json') and not fname.endswith('.gz'):
fname += '.json'
if gz:
if not fname.endswith('.gz'):
fname += '.gz'
fp = gzip.open(fname, 'wb')
fp.write(jsonstring.encode('utf-8'))
else:
fp = open(fname, 'w', encoding='utf-8')
fp.write(jsonstring)
fp.close()
def load_json(fname, verbose=True, gz=True, full_output=False):
"""Import a list of Obs or structures containing Obs from a .json.gz file.
The following structures are supported: Obs, list, np.ndarray
If the list contains only one element, it is unpacked from the list.
Parameters
-----------------
fname : str
Filename of the input file
verbose : bool
Print additional information that was written to the file.
gz : bool
If True, assumes that data is gzipped. If False, assumes JSON file.
full_output : bool
If True, a dict containing auxiliary information and the data is returned.
If False, only the data is returned.
"""
def _gen_obsd_from_datad(d):
retd = {}
retd['names'] = []
retd['idl'] = []
retd['deltas'] = []
retd['is_merged'] = {}
for ens in d:
for rep in ens['replica']:
retd['names'].append(rep['name'])
retd['idl'].append([di[0] for di in rep['deltas']])
retd['deltas'].append(np.array([di[1:] for di in rep['deltas']]))
retd['is_merged'][rep['name']] = rep.get('is_merged', False)
return retd
def get_Obs_from_dict(o):
layouts = o.get('layout', '1').strip()
if layouts != '1':
raise Exception("layout is %s has to be 1 for type Obs." % (layouts), RuntimeWarning)
values = o['value']
od = _gen_obsd_from_datad(o['data'])
ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl'])
ret.reweighted = o.get('reweighted', False)
ret.is_merged = od['is_merged']
ret.tag = o.get('tag', None)
return ret
def get_List_from_dict(o):
layouts = o.get('layout', '1').strip()
layout = int(layouts)
values = o['value']
od = _gen_obsd_from_datad(o['data'])
ret = []
for i in range(layout):
ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl']))
ret[-1].reweighted = o.get('reweighted', False)
ret[-1].is_merged = od['is_merged']
ret[-1].tag = o.get('tag', None)
return ret
def get_Array_from_dict(o):
layouts = o.get('layout', '1').strip()
layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0]
values = o['value']
od = _gen_obsd_from_datad(o['data'])
ret = []
for i in range(np.prod(layout)):
ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl']))
ret[-1].reweighted = o.get('reweighted', False)
ret[-1].is_merged = od['is_merged']
ret[-1].tag = o.get('tag', None)
return np.reshape(ret, layout)
if not fname.endswith('.json') and not fname.endswith('.gz'):
fname += '.json'
if gz:
if not fname.endswith('.gz'):
fname += '.gz'
with gzip.open(fname, 'r') as fin:
d = json.loads(fin.read().decode('utf-8'))
else:
if fname.endswith('.gz'):
warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
with open(fname, 'r', encoding='utf-8') as fin:
d = json.loads(fin.read())
prog = d.get('program', '')
version = d.get('version', '')
who = d.get('who', '')
date = d.get('date', '')
host = d.get('host', '')
if prog and verbose:
print('Data has been written using %s.' % (prog))
if version and verbose:
print('Format version %s' % (version))
if np.any([who, date, host] and verbose):
print('Written by %s on %s on host %s' % (who, date, host))
description = d.get('description', '')
if description and verbose:
print()
print('Description: ', description)
obsdata = d['obsdata']
ol = []
for io in obsdata:
if io['type'] == 'Obs':
ol.append(get_Obs_from_dict(io))
elif io['type'] == 'List':
ol.append(get_List_from_dict(io))
elif io['type'] == 'Array':
ol.append(get_Array_from_dict(io))
if full_output:
retd = {}
retd['program'] = prog
retd['version'] = version
retd['who'] = who
retd['date'] = date
retd['host'] = host
retd['description'] = description
retd['obsdata'] = ol
return retd
else:
if len(obsdata) == 1:
ol = ol[0]
return ol

View file

@ -174,6 +174,35 @@ def matmul(*operands):
return derived_array(multi_dot, operands)
def _exp_to_jack(matrix):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = entry.export_jackknife()
return base_matrix
def _imp_from_jack(matrix, name, idl):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = import_jackknife(entry, name, [idl])
return base_matrix
def _exp_to_jack_c(matrix):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = entry.real.export_jackknife() + 1j * entry.imag.export_jackknife()
return base_matrix
def _imp_from_jack_c(matrix, name, idl):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = CObs(import_jackknife(entry.real, name, [idl]),
import_jackknife(entry.imag, name, [idl]))
return base_matrix
def jack_matmul(*operands):
"""Matrix multiply both operands making use of the jackknife approximation.
@ -190,49 +219,24 @@ def jack_matmul(*operands):
name = operands[0].flat[0].real.names[0]
idl = operands[0].flat[0].real.idl[name]
def _exp_to_jack(matrix):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = entry.real.export_jackknife() + 1j * entry.imag.export_jackknife()
return base_matrix
def _imp_from_jack(matrix):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = CObs(import_jackknife(entry.real, name, [idl]),
import_jackknife(entry.imag, name, [idl]))
return base_matrix
r = _exp_to_jack(operands[0])
r = _exp_to_jack_c(operands[0])
for op in operands[1:]:
if isinstance(op.flat[0], CObs):
r = r @ _exp_to_jack(op)
r = r @ _exp_to_jack_c(op)
else:
r = r @ op
return _imp_from_jack(r)
return _imp_from_jack_c(r, name, idl)
else:
name = operands[0].flat[0].names[0]
idl = operands[0].flat[0].idl[name]
def _exp_to_jack(matrix):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = entry.export_jackknife()
return base_matrix
def _imp_from_jack(matrix):
base_matrix = np.empty_like(matrix)
for index, entry in np.ndenumerate(matrix):
base_matrix[index] = import_jackknife(entry, name, [idl])
return base_matrix
r = _exp_to_jack(operands[0])
for op in operands[1:]:
if isinstance(op.flat[0], Obs):
r = r @ _exp_to_jack(op)
else:
r = r @ op
return _imp_from_jack(r)
return _imp_from_jack(r, name, idl)
def inv(x):

57
tests/io_test.py Normal file
View file

@ -0,0 +1,57 @@
import pyerrors.obs as pe
import pyerrors.input.json as jsonio
import numpy as np
import os
def test_jsonio():
o = pe.pseudo_Obs(1.0, .2, 'one')
o2 = pe.pseudo_Obs(0.5, .1, 'two|r1')
o3 = pe.pseudo_Obs(0.5, .1, 'two|r2')
o4 = pe.merge_obs([o2, o3])
otag = 'This has been merged!'
o4.tag = otag
do = o - .2 * o4
o5 = pe.pseudo_Obs(0.8, .1, 'two|r2')
testl = [o3, o5]
arr = np.array([o3, o5])
mat = np.array([[pe.pseudo_Obs(1.0, .1, 'mat'), pe.pseudo_Obs(0.3, .1, 'mat')], [pe.pseudo_Obs(0.2, .1, 'mat'), pe.pseudo_Obs(2.0, .4, 'mat')]])
tt1 = pe.Obs([np.random.rand(100)], ['t|r1'], idl=[range(2, 202, 2)])
tt2 = pe.Obs([np.random.rand(100)], ['t|r2'], idl=[range(2, 202, 2)])
tt3 = pe.Obs([np.random.rand(102)], ['qe'])
tt = tt1 + tt2 + tt3
tt.tag = 'Test Obs'
ol = [o4, do, testl, mat, arr, np.array([o]), np.array([tt, tt]), [tt, tt]]
fname = 'test_rw'
jsonio.dump_to_json(ol, fname, indent=1)
rl = jsonio.load_json(fname)
os.remove(fname + '.json.gz')
for i in range(len(rl)):
if isinstance(ol[i], pe.Obs):
o = ol[i] - rl[i]
assert(o.is_zero())
assert(ol[i].tag == rl[i].tag)
or1 = np.ravel(ol[i])
or2 = np.ravel(rl[i])
for j in range(len(or1)):
o = or1[j] - or2[j]
assert(o.is_zero())
description = {'I': {'Am': {'a': 'nested dictionary!'}}}
jsonio.dump_to_json(ol, fname, indent=1, gz=False, description=description)
rl = jsonio.load_json(fname, gz=False, full_output=True)
assert(description == rl['description'])
os.remove(fname + '.json')