From 63dd627e2023a677b802090743c8ff98a34db05e Mon Sep 17 00:00:00 2001 From: Simon Kuberski Date: Mon, 29 Nov 2021 15:27:28 +0100 Subject: [PATCH 01/11] First implemenation of a json I/O --- pyerrors/input/json.py | 265 +++++++++++++++++++++++++++++++++++++++++ tests/io_test.py | 36 ++++++ 2 files changed, 301 insertions(+) create mode 100644 pyerrors/input/json.py create mode 100644 tests/io_test.py diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py new file mode 100644 index 00000000..f4229a20 --- /dev/null +++ b/pyerrors/input/json.py @@ -0,0 +1,265 @@ +import json +import gzip +from ..obs import Obs +import getpass +import socket +import datetime +from .. import version as pyerrorsversion +import platform +import numpy as np + + +def dump_to_json(ol, fname, description='', indent=4): + """Export a list of Obs or structures containing Obs to a .json.gz file + + Parameters + ----------------- + ol : list + List of objects that will be exported. At the moments, these objects can be + either of: Obs, list, np.ndarray + All Obs inside a structure have to be defined on the same set of configurations. + fname : str + Filename of the output file + description : str + Optional string that describes the contents of the json file + indent : int + Specify the indentation level of the json file. None or 0 is permissible and + saves disk space. + """ + + def _default(self, obj): + return str(obj) + my_encoder = json.JSONEncoder + _default.default = json.JSONEncoder().default + my_encoder.default = _default + + class deltalist: + def __init__(self, li): + self.cnfg = li[0] + self.deltas = li[1:] + + def __repr__(self): + s = '[%d' % (self.cnfg) + for d in self.deltas: + s += ', %1.15e' % (d) + s += ']' + return s + + def __str__(self): + return self.__repr__() + + def _gen_data_d_from_list(ol): + dl = [] + for name in ol[0].e_names: + ed = {} + ed['id'] = name + ed['replica'] = [] + for r_name in ol[0].e_content[name]: + rd = {} + rd['name'] = r_name + if ol[0].is_merged.get(r_name, False): + rd['is_merged'] = True + rd['deltas'] = [] + for i in range(len(ol[0].idl[r_name])): + rd['deltas'].append([ol[0].idl[r_name][i]]) + for o in ol: + rd['deltas'][-1].append(o.deltas[r_name][i]) + rd['deltas'][-1] = deltalist(rd['deltas'][-1]) + ed['replica'].append(rd) + dl.append(ed) + return dl + + def _assert_equal_properties(ol, otype=Obs): + for o in ol: + if not isinstance(o, otype): + raise Exception('Wrong data type in list!') + for o in ol[1:]: + if not ol[0].is_merged == o.is_merged: + raise Exception('All Obs in list have to be defined on the same set of configs!') + if not ol[0].reweighted == o.reweighted: + raise Exception('All Obs in list have to have the same property .reweighted!') + if not ol[0].e_content == o.e_content: + raise Exception('All Obs in list have to be defined on the same set of configs!') + # more stringend tests --> compare idl? + + def write_Obs_to_dict(o): + d = {} + d['type'] = 'Obs' + d['layout'] = '1' + d['tag'] = o.tag + if o.reweighted: + d['reweighted'] = o.reweighted + d['value'] = [o.value] + d['data'] = _gen_data_d_from_list([o]) + return d + + def write_List_to_dict(ol): + _assert_equal_properties(ol) + d = {} + d['type'] = 'List' + d['layout'] = '%d' % len(ol) + if len(set([o.tag for o in ol])) > 1: + d['tag'] = '' + for o in ol: + d['tag'] += '%s\n' % (o.tag) + else: + d['tag'] = ol[0].tag + if ol[0].reweighted: + d['reweighted'] = ol[0].reweighted + d['value'] = [o.value for o in ol] + d['data'] = _gen_data_d_from_list(ol) + + return d + + def write_Array_to_dict(oa): + ol = np.ravel(oa) + _assert_equal_properties(ol) + d = {} + d['type'] = 'Array' + d['layout'] = str(oa.shape).lstrip('(').rstrip(')') + if len(set([o.tag for o in ol])) > 1: + d['tag'] = '' + for o in ol: + d['tag'] += '%s\n' % (o.tag) + else: + d['tag'] = ol[0].tag + if ol[0].reweighted: + d['reweighted'] = ol[0].reweighted + d['value'] = [o.value for o in ol] + d['data'] = _gen_data_d_from_list(ol) + return d + if not isinstance(ol, list): + ol = [ol] + d = {} + d['program'] = 'pyerrors %s' % (pyerrorsversion.__version__) + d['version'] = '0.1' + d['who'] = getpass.getuser() + d['date'] = str(datetime.datetime.now())[:-7] + d['host'] = socket.gethostname() + ', ' + platform.platform() + + if description: + d['description'] = description + d['obsdata'] = [] + for io in ol: + if isinstance(io, Obs): + d['obsdata'].append(write_Obs_to_dict(io)) + elif isinstance(io, list): + d['obsdata'].append(write_List_to_dict(io)) + elif isinstance(io, np.ndarray): + d['obsdata'].append(write_Array_to_dict(io)) + if not fname.endswith('.json') and not fname.endswith('.gz'): + fname += '.json' + if not fname.endswith('.gz'): + fname += '.gz' + jsonstring = json.dumps(d, indent=indent, cls=my_encoder) + # workaround for un-indentation of delta lists + jsonstring = jsonstring.replace('"[', '[').replace(']"', ']') + fp = gzip.open(fname, 'wb') + fp.write(jsonstring.encode('utf-8')) + fp.close() + + # this would be nicer, since it does not need a string + # with gzip.open(fname, 'wt', encoding='UTF-8') as zipfile: + # json.dump(d, zipfile, indent=indent) + + +def load_json(fname, verbose=True): + """Import a list of Obs or structures containing Obs to a .json.gz file. + The following structures are supported: Obs, list, np.ndarray + + Parameters + ----------------- + fname : str + Filename of the input file + verbose : bool + Print additional information that was written to the file. + """ + + def _gen_obsd_from_datad(d): + retd = {} + retd['names'] = [] + retd['idl'] = [] + retd['deltas'] = [] + retd['is_merged'] = {} + for ens in d: + for rep in ens['replica']: + retd['names'].append(rep['name']) + retd['idl'].append([di[0] for di in rep['deltas']]) + retd['deltas'].append([di[1:] for di in rep['deltas']]) + retd['is_merged'][rep['name']] = rep.get('is_merged', False) + retd['deltas'] = np.array(retd['deltas']) + return retd + + def get_Obs_from_dict(o): + layouts = o.get('layout', '1').strip() + if layouts != '1': + raise Exception("layout is %s has to be 1 for type Obs." % (layouts), RuntimeWarning) + + values = o['value'] + od = _gen_obsd_from_datad(o['data']) + + ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl']) + ret.reweighted = o.get('reweighted', False) + ret.is_merged = od['is_merged'] + ret.tag = o.get('tag', '') + return ret + + def get_List_from_dict(o): + layouts = o.get('layout', '1').strip() + layout = int(layouts) + values = o['value'] + od = _gen_obsd_from_datad(o['data']) + + ret = [] + for i in range(layout): + ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl'])) + ret[-1].reweighted = o.get('reweighted', False) + ret[-1].is_merged = od['is_merged'] + ret[-1].tag = o.get('tag', '') + return ret + + def get_Array_from_dict(o): + layouts = o.get('layout', '1').strip() + layout = [int(ls.strip()) for ls in layouts.split(',')] + values = o['value'] + od = _gen_obsd_from_datad(o['data']) + + ret = [] + for i in range(np.prod(layout)): + ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl'])) + ret[-1].reweighted = o.get('reweighted', False) + ret[-1].is_merged = od['is_merged'] + ret[-1].tag = o.get('tag', '') + return np.reshape(ret, layout) + + if not fname.endswith('.json') and not fname.endswith('.gz'): + fname += '.json' + if not fname.endswith('.gz'): + fname += '.gz' + with gzip.open(fname, 'r') as fin: + d = json.loads(fin.read().decode('utf-8')) + prog = d.get('program', '') + version = d.get('version', '') + who = d.get('who', '') + date = d.get('date', '') + host = d.get('host', '') + if prog and verbose: + print('Data has been written using %s.' % (prog)) + if version and verbose: + print('Format version %s' % (version)) + if np.any([who, date, host] and verbose): + print('Written by %s on %s on host %s' % (who, date, host)) + description = d.get('description', '') + if description and verbose: + print() + print(description) + obsdata = d['obsdata'] + ol = [] + for io in obsdata: + if io['type'] == 'Obs': + ol.append(get_Obs_from_dict(io)) + elif io['type'] == 'List': + ol.append(get_List_from_dict(io)) + elif io['type'] == 'Array': + ol.append(get_Array_from_dict(io)) + return ol diff --git a/tests/io_test.py b/tests/io_test.py new file mode 100644 index 00000000..c2dcdcbb --- /dev/null +++ b/tests/io_test.py @@ -0,0 +1,36 @@ +import pyerrors.obs as pe +import pyerrors.input.json as jsonio +import numpy as np +import os + + +def test_jsonio(): + o = pe.pseudo_Obs(1.0, .2, 'one') + o2 = pe.pseudo_Obs(0.5, .1, 'two|r1') + o3 = pe.pseudo_Obs(0.5, .1, 'two|r2') + o4 = pe.merge_obs([o2, o3]) + do = o - .2 * o4 + + o5 = pe.pseudo_Obs(0.8, .1, 'two|r2') + testl = [o3, o5] + + mat = np.array([[pe.pseudo_Obs(1.0, .1, 'mat'), pe.pseudo_Obs(0.3, .1, 'mat')], [pe.pseudo_Obs(0.2, .1, 'mat'), pe.pseudo_Obs(2.0, .4, 'mat')]]) + + ol = [do, testl, mat] + fname = 'test_rw' + + jsonio.dump_to_json(ol, fname, indent=1) + + rl = jsonio.load_json(fname) + + os.remove(fname + '.json.gz') + + for i in range(len(rl)): + if isinstance(ol[i], pe.Obs): + o = ol[i] - rl[i] + assert(o.is_zero()) + or1 = np.ravel(ol[i]) + or2 = np.ravel(rl[i]) + for j in range(len(or1)): + o = or1[i] - or2[i] + assert(o.is_zero()) From d7c2f125fed7ebf1a81d88b5fb3dd27c80105cb0 Mon Sep 17 00:00:00 2001 From: Simon Kuberski Date: Tue, 30 Nov 2021 14:28:07 +0100 Subject: [PATCH 02/11] Created routine to get jsonstring itself, allowed for the I/O of uncompressed files, fixed bug for 1d-Arrays --- pyerrors/input/json.py | 82 +++++++++++++++++++++++++++++++++--------- tests/io_test.py | 5 +-- 2 files changed, 68 insertions(+), 19 deletions(-) diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py index f4229a20..83edfb63 100644 --- a/pyerrors/input/json.py +++ b/pyerrors/input/json.py @@ -7,10 +7,12 @@ import datetime from .. import version as pyerrorsversion import platform import numpy as np +import warnings -def dump_to_json(ol, fname, description='', indent=4): - """Export a list of Obs or structures containing Obs to a .json.gz file +def create_json_string(ol, fname, description='', indent=1): + """Generate the string for the export of a list of Obs or structures containing Obs + to a .json(.gz) file Parameters ----------------- @@ -147,25 +149,59 @@ def dump_to_json(ol, fname, description='', indent=4): d['obsdata'].append(write_List_to_dict(io)) elif isinstance(io, np.ndarray): d['obsdata'].append(write_Array_to_dict(io)) - if not fname.endswith('.json') and not fname.endswith('.gz'): - fname += '.json' - if not fname.endswith('.gz'): - fname += '.gz' + jsonstring = json.dumps(d, indent=indent, cls=my_encoder) # workaround for un-indentation of delta lists - jsonstring = jsonstring.replace('"[', '[').replace(']"', ']') - fp = gzip.open(fname, 'wb') - fp.write(jsonstring.encode('utf-8')) + jsonstring = jsonstring.replace(' "[', ' [').replace(']",', '],').replace(']"\n', ']\n') + + return jsonstring + + +def dump_to_json(ol, fname, description='', indent=1, gz=True): + """Export a list of Obs or structures containing Obs to a .json(.gz) file + + Parameters + ----------------- + ol : list + List of objects that will be exported. At the moments, these objects can be + either of: Obs, list, np.ndarray + All Obs inside a structure have to be defined on the same set of configurations. + fname : str + Filename of the output file + description : str + Optional string that describes the contents of the json file + indent : int + Specify the indentation level of the json file. None or 0 is permissible and + saves disk space. + gz : bool + If True, the output is a gzipped json. If False, the output is a json file. + """ + + jsonstring = create_json_string(ol, fname, description, indent) + + if not fname.endswith('.json') and not fname.endswith('.gz'): + fname += '.json' + + if gz: + if not fname.endswith('.gz'): + fname += '.gz' + + fp = gzip.open(fname, 'wb') + fp.write(jsonstring.encode('utf-8')) + else: + fp = open(fname, 'w') + fp.write(jsonstring) fp.close() - # this would be nicer, since it does not need a string + # this would be nicer, since it does not need a string but uses serialization (less memory!) # with gzip.open(fname, 'wt', encoding='UTF-8') as zipfile: # json.dump(d, zipfile, indent=indent) -def load_json(fname, verbose=True): - """Import a list of Obs or structures containing Obs to a .json.gz file. +def load_json(fname, verbose=True, gz=True): + """Import a list of Obs or structures containing Obs from a .json.gz file. The following structures are supported: Obs, list, np.ndarray + If the list contains only one element, it is unpacked from the list. Parameters ----------------- @@ -173,6 +209,8 @@ def load_json(fname, verbose=True): Filename of the input file verbose : bool Print additional information that was written to the file. + gz : bool + If True, assumes that data is gzipped. If False, assumes JSON file. """ def _gen_obsd_from_datad(d): @@ -220,7 +258,7 @@ def load_json(fname, verbose=True): def get_Array_from_dict(o): layouts = o.get('layout', '1').strip() - layout = [int(ls.strip()) for ls in layouts.split(',')] + layout = [int(ls.strip()) for ls in layouts.split(',') if len(ls) > 0] values = o['value'] od = _gen_obsd_from_datad(o['data']) @@ -234,10 +272,17 @@ def load_json(fname, verbose=True): if not fname.endswith('.json') and not fname.endswith('.gz'): fname += '.json' - if not fname.endswith('.gz'): - fname += '.gz' - with gzip.open(fname, 'r') as fin: - d = json.loads(fin.read().decode('utf-8')) + if gz: + if not fname.endswith('.gz'): + fname += '.gz' + with gzip.open(fname, 'r') as fin: + d = json.loads(fin.read().decode('utf-8')) + else: + if fname.endswith('.gz'): + warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) + with open(fname, 'r') as fin: + d = json.loads(fin.read()) + prog = d.get('program', '') version = d.get('version', '') who = d.get('who', '') @@ -262,4 +307,7 @@ def load_json(fname, verbose=True): ol.append(get_List_from_dict(io)) elif io['type'] == 'Array': ol.append(get_Array_from_dict(io)) + + if len(obsdata) == 1: + ol = ol[0] return ol diff --git a/tests/io_test.py b/tests/io_test.py index c2dcdcbb..95dc00cd 100644 --- a/tests/io_test.py +++ b/tests/io_test.py @@ -14,9 +14,10 @@ def test_jsonio(): o5 = pe.pseudo_Obs(0.8, .1, 'two|r2') testl = [o3, o5] + arr = np.array([o3, o5]) mat = np.array([[pe.pseudo_Obs(1.0, .1, 'mat'), pe.pseudo_Obs(0.3, .1, 'mat')], [pe.pseudo_Obs(0.2, .1, 'mat'), pe.pseudo_Obs(2.0, .4, 'mat')]]) - ol = [do, testl, mat] + ol = [do, testl, mat, arr, np.array([o])] fname = 'test_rw' jsonio.dump_to_json(ol, fname, indent=1) @@ -32,5 +33,5 @@ def test_jsonio(): or1 = np.ravel(ol[i]) or2 = np.ravel(rl[i]) for j in range(len(or1)): - o = or1[i] - or2[i] + o = or1[j] - or2[j] assert(o.is_zero()) From 5937a519d7cc1a7c09f46b5683c05e028af5d4a6 Mon Sep 17 00:00:00 2001 From: Simon Kuberski Date: Tue, 30 Nov 2021 14:50:16 +0100 Subject: [PATCH 03/11] Added dictionary output of json input routine: It is possible to import and export any structure that is JSON serializable as description (or Obs.tag) --- pyerrors/input/json.py | 55 ++++++++++++++++++++++++++++-------------- tests/io_test.py | 14 ++++++++++- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py index 83edfb63..bc4deaee 100644 --- a/pyerrors/input/json.py +++ b/pyerrors/input/json.py @@ -88,7 +88,8 @@ def create_json_string(ol, fname, description='', indent=1): d = {} d['type'] = 'Obs' d['layout'] = '1' - d['tag'] = o.tag + if o.tag: + d['tag'] = o.tag if o.reweighted: d['reweighted'] = o.reweighted d['value'] = [o.value] @@ -100,12 +101,13 @@ def create_json_string(ol, fname, description='', indent=1): d = {} d['type'] = 'List' d['layout'] = '%d' % len(ol) - if len(set([o.tag for o in ol])) > 1: - d['tag'] = '' - for o in ol: - d['tag'] += '%s\n' % (o.tag) - else: + if ol[0].tag: d['tag'] = ol[0].tag + if isinstance(ol[0].tag, str): + if len(set([o.tag for o in ol])) > 1: + d['tag'] = '' + for o in ol: + d['tag'] += '%s\n' % (o.tag) if ol[0].reweighted: d['reweighted'] = ol[0].reweighted d['value'] = [o.value for o in ol] @@ -119,12 +121,13 @@ def create_json_string(ol, fname, description='', indent=1): d = {} d['type'] = 'Array' d['layout'] = str(oa.shape).lstrip('(').rstrip(')') - if len(set([o.tag for o in ol])) > 1: - d['tag'] = '' - for o in ol: - d['tag'] += '%s\n' % (o.tag) - else: + if ol[0].tag: d['tag'] = ol[0].tag + if isinstance(ol[0].tag, str): + if len(set([o.tag for o in ol])) > 1: + d['tag'] = '' + for o in ol: + d['tag'] += '%s\n' % (o.tag) if ol[0].reweighted: d['reweighted'] = ol[0].reweighted d['value'] = [o.value for o in ol] @@ -198,7 +201,7 @@ def dump_to_json(ol, fname, description='', indent=1, gz=True): # json.dump(d, zipfile, indent=indent) -def load_json(fname, verbose=True, gz=True): +def load_json(fname, verbose=True, gz=True, full_output=False): """Import a list of Obs or structures containing Obs from a .json.gz file. The following structures are supported: Obs, list, np.ndarray If the list contains only one element, it is unpacked from the list. @@ -211,6 +214,9 @@ def load_json(fname, verbose=True, gz=True): Print additional information that was written to the file. gz : bool If True, assumes that data is gzipped. If False, assumes JSON file. + full_output : bool + If True, a dict containing auxiliary information and the data is returned. + If False, only the data is returned. """ def _gen_obsd_from_datad(d): @@ -239,7 +245,7 @@ def load_json(fname, verbose=True, gz=True): ret = Obs([[ddi[0] + values[0] for ddi in di] for di in od['deltas']], od['names'], idl=od['idl']) ret.reweighted = o.get('reweighted', False) ret.is_merged = od['is_merged'] - ret.tag = o.get('tag', '') + ret.tag = o.get('tag', None) return ret def get_List_from_dict(o): @@ -253,7 +259,7 @@ def load_json(fname, verbose=True, gz=True): ret.append(Obs([list(di[:, i] + values[i]) for di in od['deltas']], od['names'], idl=od['idl'])) ret[-1].reweighted = o.get('reweighted', False) ret[-1].is_merged = od['is_merged'] - ret[-1].tag = o.get('tag', '') + ret[-1].tag = o.get('tag', None) return ret def get_Array_from_dict(o): @@ -267,7 +273,7 @@ def load_json(fname, verbose=True, gz=True): ret.append(Obs([di[:, i] + values[i] for di in od['deltas']], od['names'], idl=od['idl'])) ret[-1].reweighted = o.get('reweighted', False) ret[-1].is_merged = od['is_merged'] - ret[-1].tag = o.get('tag', '') + ret[-1].tag = o.get('tag', None) return np.reshape(ret, layout) if not fname.endswith('.json') and not fname.endswith('.gz'): @@ -308,6 +314,19 @@ def load_json(fname, verbose=True, gz=True): elif io['type'] == 'Array': ol.append(get_Array_from_dict(io)) - if len(obsdata) == 1: - ol = ol[0] - return ol + if full_output: + retd = {} + retd['program'] = prog + retd['version'] = version + retd['who'] = who + retd['date'] = date + retd['host'] = host + retd['description'] = description + retd['obsdata'] = ol + + return retd + else: + if len(obsdata) == 1: + ol = ol[0] + + return ol diff --git a/tests/io_test.py b/tests/io_test.py index 95dc00cd..de309fd0 100644 --- a/tests/io_test.py +++ b/tests/io_test.py @@ -9,6 +9,8 @@ def test_jsonio(): o2 = pe.pseudo_Obs(0.5, .1, 'two|r1') o3 = pe.pseudo_Obs(0.5, .1, 'two|r2') o4 = pe.merge_obs([o2, o3]) + otag = 'This has been merged!' + o4.tag = otag do = o - .2 * o4 o5 = pe.pseudo_Obs(0.8, .1, 'two|r2') @@ -17,7 +19,7 @@ def test_jsonio(): arr = np.array([o3, o5]) mat = np.array([[pe.pseudo_Obs(1.0, .1, 'mat'), pe.pseudo_Obs(0.3, .1, 'mat')], [pe.pseudo_Obs(0.2, .1, 'mat'), pe.pseudo_Obs(2.0, .4, 'mat')]]) - ol = [do, testl, mat, arr, np.array([o])] + ol = [o4, do, testl, mat, arr, np.array([o])] fname = 'test_rw' jsonio.dump_to_json(ol, fname, indent=1) @@ -30,8 +32,18 @@ def test_jsonio(): if isinstance(ol[i], pe.Obs): o = ol[i] - rl[i] assert(o.is_zero()) + assert(ol[i].tag == rl[i].tag) or1 = np.ravel(ol[i]) or2 = np.ravel(rl[i]) for j in range(len(or1)): o = or1[j] - or2[j] assert(o.is_zero()) + + description = {'I': {'Am': {'a': 'nested dictionary!'}}} + jsonio.dump_to_json(ol, fname, indent=1, gz=False, description=description) + + rl = jsonio.load_json(fname, gz=False, full_output=True) + + assert(description == rl['description']) + + os.remove(fname + '.json') From 833c22fe36f6a9fad7c7d8b30799810d19d612ce Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 Nov 2021 14:52:25 +0000 Subject: [PATCH 04/11] docs: Changelog updated --- CHANGELOG.md | 27 ++++++++++++++++++++------- README.md | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 92681a4a..73d9eab9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,20 +4,33 @@ All notable changes to this project will be documented in this file. ## [2.0.0] - 2021-??-?? ### Added -- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation -- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int` -- `Obs` objects now have methods `is_zero` and `is_zero_within_error` +- `CObs` class added which can handle complex valued Markov chain Monte Carlo data and the corresponding error propagation. +- Matrix to matrix operations like the matrix inverse now also work for complex matrices and matrices containing entries that are not `Obs` but `float` or `int`. +- The possibility to work with Monte Carlo histories which are evenly or unevenly spaced was added. +- The Corr class now has additional methods like `reverse`, `T_symmetry`, `correlate` and `reweight`. +- `linalg` module now has explicit functions `inv` and `cholesky`. +- `Obs` objects now have methods `is_zero` and `is_zero_within_error` as well as overloaded comparison operations. +- Functions to convert Obs data to or from jackknife was added. +- Alternative matrix multiplication routine `jack_matmul` was added to `linalg` module which makes use of the jackknife approximation and is much faster for large matrices. +- Additional input routines for npr data added to `input.hadrons`. +- Version number added. ### Changed -- Additional attributes can no longer be added to existing `Obs`. This makes it no longer possible to import `Obs` created with previous versions of pyerrors -- The default value for `Corr.prange` is now `None` -- The `input` module was restructured to contain one submodule per data source +- The internal bookkeeping system for ensembles/replica was changed. The separator for replica is now `|`. +- The fit functions were renamed to `least_squares` and `total_least_squares`. +- The fit functions can now deal with provided covariance matrices. +- The convention for the fit range in the Corr class has been changed. +- Obs.print was renamed to Obs.details and the output was improved. +- The default value for `Corr.prange` is now `None`. +- The `input` module was restructured to contain one submodule per data source. +- Performance of Obs.__init__ improved. ### Deprecated - The function `plot_corrs` was deprecated as all its functionality is now contained within `Corr.show` - The kwarg `bias_correction` in `derived_observable` was removed - Obs no longer have an attribute `e_Q` - Removed `fits.fit_exp` +- Removed jackknife module ## [1.1.0] - 2021-10-11 ### Added @@ -77,7 +90,7 @@ All notable changes to this project will be documented in this file. ## [0.7.0] - 2020-03-10 ### Added -- New fit funtions for fitting with and without x-errors added which use automatic differentiation and should be more reliable than the old ones. +- New fit functions for fitting with and without x-errors added which use automatic differentiation and should be more reliable than the old ones. - Fitting with Bayesian priors added. - New functions for visualization of fits which can be activated via the kwargs resplot and qqplot. - chisquare/expected_chisquared which takes into account correlations in the data and non-linearities in the fit function can now be activated with the kwarg expected_chisquare. diff --git a/README.md b/README.md index 4e4a0a9d..0aa5e77e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ `pyerrors` is a python package for error computation and propagation of Markov chain Monte Carlo data. - **Documentation:** https://fjosw.github.io/pyerrors/pyerrors.html -- **Examples**: https://github.com/fjosw/pyerrors/tree/develop/examples +- **Examples**: https://github.com/fjosw/pyerrors/tree/develop/examples (Do not work properly at the moment) - **Contributing:** https://github.com/fjosw/pyerrors/blob/develop/CONTRIBUTING.md - **Bug reports:** https://github.com/fjosw/pyerrors/issues From 6f99ec16fb532badc282ff3de0402648bf0c040c Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 Nov 2021 14:57:36 +0000 Subject: [PATCH 05/11] docs: reference to other error analysis suites moved from README to documentation --- README.md | 6 ------ pyerrors/__init__.py | 5 +++++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 0aa5e77e..773970cb 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,3 @@ to install the most recent release run ```bash pip install git+https://github.com/fjosw/pyerrors.git@master ``` - -## Other implementations -There exist similar publicly available implementations of gamma method error analysis suites in -- [Fortran](https://gitlab.ift.uam-csic.es/alberto/aderrors) -- [Julia](https://gitlab.ift.uam-csic.es/alberto/aderrors.jl) -- [Python](https://github.com/mbruno46/pyobs) diff --git a/pyerrors/__init__.py b/pyerrors/__init__.py index f2194cdb..5ff2c925 100644 --- a/pyerrors/__init__.py +++ b/pyerrors/__init__.py @@ -8,6 +8,11 @@ It is based on the **gamma method** [arXiv:hep-lat/0306017](https://arxiv.org/ab - **non-linear fits with x- and y-errors** and exact linear error propagation based on automatic differentiation as introduced in [arXiv:1809.01289](https://arxiv.org/abs/1809.01289) - **real and complex matrix operations** and their error propagation based on automatic differentiation (Cholesky decomposition, calculation of eigenvalues and eigenvectors, singular value decomposition...) +There exist similar publicly available implementations of gamma method error analysis suites in +- [Fortran](https://gitlab.ift.uam-csic.es/alberto/aderrors) +- [Julia](https://gitlab.ift.uam-csic.es/alberto/aderrors.jl) +- [Python](https://github.com/mbruno46/pyobs) + ## Basic example ```python From d740f8d3c4893ae96434b6daba783b597d50499b Mon Sep 17 00:00:00 2001 From: Simon Kuberski Date: Tue, 30 Nov 2021 16:26:46 +0100 Subject: [PATCH 06/11] utf-8 for plain json files and a bugfix for reading arrays --- pyerrors/input/json.py | 16 ++++++---------- tests/io_test.py | 10 +++++++++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py index bc4deaee..d81a5f94 100644 --- a/pyerrors/input/json.py +++ b/pyerrors/input/json.py @@ -120,7 +120,7 @@ def create_json_string(ol, fname, description='', indent=1): _assert_equal_properties(ol) d = {} d['type'] = 'Array' - d['layout'] = str(oa.shape).lstrip('(').rstrip(')') + d['layout'] = str(oa.shape).lstrip('(').rstrip(')').rstrip(',') if ol[0].tag: d['tag'] = ol[0].tag if isinstance(ol[0].tag, str): @@ -153,7 +153,7 @@ def create_json_string(ol, fname, description='', indent=1): elif isinstance(io, np.ndarray): d['obsdata'].append(write_Array_to_dict(io)) - jsonstring = json.dumps(d, indent=indent, cls=my_encoder) + jsonstring = json.dumps(d, indent=indent, cls=my_encoder, ensure_ascii=False) # workaround for un-indentation of delta lists jsonstring = jsonstring.replace(' "[', ' [').replace(']",', '],').replace(']"\n', ']\n') @@ -192,14 +192,10 @@ def dump_to_json(ol, fname, description='', indent=1, gz=True): fp = gzip.open(fname, 'wb') fp.write(jsonstring.encode('utf-8')) else: - fp = open(fname, 'w') + fp = open(fname, 'w', encoding='utf-8') fp.write(jsonstring) fp.close() - # this would be nicer, since it does not need a string but uses serialization (less memory!) - # with gzip.open(fname, 'wt', encoding='UTF-8') as zipfile: - # json.dump(d, zipfile, indent=indent) - def load_json(fname, verbose=True, gz=True, full_output=False): """Import a list of Obs or structures containing Obs from a .json.gz file. @@ -229,7 +225,7 @@ def load_json(fname, verbose=True, gz=True, full_output=False): for rep in ens['replica']: retd['names'].append(rep['name']) retd['idl'].append([di[0] for di in rep['deltas']]) - retd['deltas'].append([di[1:] for di in rep['deltas']]) + retd['deltas'].append(np.array([di[1:] for di in rep['deltas']])) retd['is_merged'][rep['name']] = rep.get('is_merged', False) retd['deltas'] = np.array(retd['deltas']) return retd @@ -286,7 +282,7 @@ def load_json(fname, verbose=True, gz=True, full_output=False): else: if fname.endswith('.gz'): warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) - with open(fname, 'r') as fin: + with open(fname, 'r', encoding='utf-8') as fin: d = json.loads(fin.read()) prog = d.get('program', '') @@ -303,7 +299,7 @@ def load_json(fname, verbose=True, gz=True, full_output=False): description = d.get('description', '') if description and verbose: print() - print(description) + print('Description: ', description) obsdata = d['obsdata'] ol = [] for io in obsdata: diff --git a/tests/io_test.py b/tests/io_test.py index de309fd0..77a0f474 100644 --- a/tests/io_test.py +++ b/tests/io_test.py @@ -19,7 +19,15 @@ def test_jsonio(): arr = np.array([o3, o5]) mat = np.array([[pe.pseudo_Obs(1.0, .1, 'mat'), pe.pseudo_Obs(0.3, .1, 'mat')], [pe.pseudo_Obs(0.2, .1, 'mat'), pe.pseudo_Obs(2.0, .4, 'mat')]]) - ol = [o4, do, testl, mat, arr, np.array([o])] + tt1 = pe.Obs([np.random.rand(100)], ['t|r1'], idl=[range(2, 202, 2)]) + tt2 = pe.Obs([np.random.rand(100)], ['t|r2'], idl=[range(2, 202, 2)]) + tt3 = pe.Obs([np.random.rand(102)], ['qe']) + + tt = tt1 + tt2 + tt3 + + tt.tag = 'Test Obs' + + ol = [o4, do, testl, mat, arr, np.array([o]), np.array([tt, tt]), [tt, tt]] fname = 'test_rw' jsonio.dump_to_json(ol, fname, indent=1) From 71e9d6c29c09b70946a9240aaef6d7cf0539d88d Mon Sep 17 00:00:00 2001 From: Simon Kuberski Date: Tue, 30 Nov 2021 17:46:33 +0100 Subject: [PATCH 07/11] Removed unnecessary cast --- pyerrors/input/json.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyerrors/input/json.py b/pyerrors/input/json.py index d81a5f94..a7c863d4 100644 --- a/pyerrors/input/json.py +++ b/pyerrors/input/json.py @@ -227,7 +227,6 @@ def load_json(fname, verbose=True, gz=True, full_output=False): retd['idl'].append([di[0] for di in rep['deltas']]) retd['deltas'].append(np.array([di[1:] for di in rep['deltas']])) retd['is_merged'][rep['name']] = rep.get('is_merged', False) - retd['deltas'] = np.array(retd['deltas']) return retd def get_Obs_from_dict(o): From 58ccd11e4827f3dcfffcbd523c28976d5c198dde Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 Nov 2021 16:50:35 +0000 Subject: [PATCH 08/11] feat: json submodule now available via pyerrors.input.json --- pyerrors/input/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyerrors/input/__init__.py b/pyerrors/input/__init__.py index 24766e77..2797841c 100644 --- a/pyerrors/input/__init__.py +++ b/pyerrors/input/__init__.py @@ -1,5 +1,6 @@ from . import bdio from . import hadrons -from . import sfcf -from . import openQCD +from . import json from . import misc +from . import openQCD +from . import sfcf From aec90803eff9c3f2d5909f236a55c8b9cc117f0e Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 Nov 2021 17:05:00 +0000 Subject: [PATCH 09/11] feat: functions which extracts npr fourquark vertices now constructs Lorentz scalars. --- pyerrors/input/hadrons.py | 78 ++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 14 deletions(-) diff --git a/pyerrors/input/hadrons.py b/pyerrors/input/hadrons.py index 00b93453..7f216f07 100644 --- a/pyerrors/input/hadrons.py +++ b/pyerrors/input/hadrons.py @@ -224,7 +224,7 @@ def read_Bilinear_hd5(path, filestem, ens_id, idl=None): return result_dict -def read_Fourquark_hd5(path, filestem, ens_id, idl=None): +def read_Fourquark_hd5(path, filestem, ens_id, idl=None, vertices=["VA", "AV"]): """Read hadrons FourquarkFullyConnected hdf5 file and output an array of CObs Parameters @@ -237,6 +237,8 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None): name of the ensemble, required for internal bookkeeping idl : range If specified only configurations in the given range are read in. + vertices : list + Vertex functions to be extracted. """ files, idx = _get_files(path, filestem, idl) @@ -244,6 +246,11 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None): mom_in = None mom_out = None + vertex_names = [] + for vertex in vertices: + vertex_names += _get_lorentz_names(vertex) + print(vertex_names) + corr_data = {} tree = 'FourQuarkFullyConnected/FourQuarkFullyConnected_' @@ -251,25 +258,35 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None): for hd5_file in files: file = h5py.File(path + '/' + hd5_file, "r") - for i in range(1): - name = file[tree + str(i) + '/info'].attrs['gammaA'][0].decode('UTF-8') + '_' + file[tree + str(i) + '/info'].attrs['gammaB'][0].decode('UTF-8') - if name not in corr_data: - corr_data[name] = [] - raw_data = file[tree + str(i) + '/corr'][0][0].view('complex') - corr_data[name].append(raw_data) - if mom_in is None: - mom_in = np.array(str(file[tree + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int) - if mom_out is None: - mom_out = np.array(str(file[tree + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int) + for i in range(32): + name = (file[tree + str(i) + '/info'].attrs['gammaA'][0].decode('UTF-8'), file[tree + str(i) + '/info'].attrs['gammaB'][0].decode('UTF-8')) + if name in vertex_names: + if name not in corr_data: + corr_data[name] = [] + raw_data = file[tree + str(i) + '/corr'][0][0].view('complex') + corr_data[name].append(raw_data) + if mom_in is None: + mom_in = np.array(str(file[tree + str(i) + '/info'].attrs['pIn'])[3:-2].strip().split(' '), dtype=int) + if mom_out is None: + mom_out = np.array(str(file[tree + str(i) + '/info'].attrs['pOut'])[3:-2].strip().split(' '), dtype=int) file.close() + intermediate_dict = {} + + for vertex in vertices: + lorentz_names = _get_lorentz_names(vertex) + for v_name in lorentz_names: + if vertex not in intermediate_dict: + intermediate_dict[vertex] = np.array(corr_data[v_name]) + else: + intermediate_dict[vertex] += np.array(corr_data[v_name]) + result_dict = {} - for key, data in corr_data.items(): - local_data = np.array(data) + for key, data in intermediate_dict.items(): - rolled_array = np.moveaxis(local_data, 0, 8) + rolled_array = np.moveaxis(data, 0, 8) matrix = np.empty((rolled_array.shape[:-1]), dtype=object) for index in np.ndindex(rolled_array.shape[:-1]): @@ -281,3 +298,36 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None): # result_dict[key] = Npr_matrix(matrix.swapaxes(1, 2).reshape((12, 12), order='F'), mom_in=mom_in, mom_out=mom_out) return result_dict + + +def _get_lorentz_names(name): + assert len(name) == 2 + + res = [] + + if not set(name) <= set(['S', 'P', 'V', 'A', 'T']): + raise Exception("Name can only contain 'S', 'P', 'V', 'A' or 'T'") + + if 'S' in name or 'P' in name: + if not set(name) <= set(['S', 'P']): + raise Exception("'" + name + "' is not a Lorentz scalar") + + g_names = {'S': 'Identity', + 'P': 'Gamma5'} + + res.append((g_names[name[0]], g_names[name[1]])) + + elif 'T' in name: + if not set(name) <= set(['T']): + raise Exception("'" + name + "' is not a Lorentz scalar") + raise Exception("Tensor operators not yet implemented.") + else: + if not set(name) <= set(['V', 'A']): + raise Exception("'" + name + "' is not a Lorentz scalar") + lorentz_index = ['X', 'Y', 'Z', 'T'] + + for ind in lorentz_index: + res.append(('Gamma' + ind + (name[0] == 'A') * 'Gamma5', + 'Gamma' + ind + (name[1] == 'A') * 'Gamma5')) + + return res From bb91c37ac402a0cde572527da6c1ead87c769877 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Tue, 30 Nov 2021 17:12:14 +0000 Subject: [PATCH 10/11] fix: unnecessary print statement and comment removed in fourquark input routine --- pyerrors/input/hadrons.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyerrors/input/hadrons.py b/pyerrors/input/hadrons.py index 7f216f07..39afa16c 100644 --- a/pyerrors/input/hadrons.py +++ b/pyerrors/input/hadrons.py @@ -249,7 +249,6 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None, vertices=["VA", "AV"]): vertex_names = [] for vertex in vertices: vertex_names += _get_lorentz_names(vertex) - print(vertex_names) corr_data = {} @@ -295,7 +294,6 @@ def read_Fourquark_hd5(path, filestem, ens_id, idl=None, vertices=["VA", "AV"]): matrix[index] = CObs(real, imag) result_dict[key] = Npr_matrix(matrix, mom_in=mom_in, mom_out=mom_out) - # result_dict[key] = Npr_matrix(matrix.swapaxes(1, 2).reshape((12, 12), order='F'), mom_in=mom_in, mom_out=mom_out) return result_dict From 6bc8102f87f9e18d3cd65d31b1c34427a5b1d2b1 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Wed, 1 Dec 2021 09:22:16 +0000 Subject: [PATCH 11/11] refactor: jackknife helper functions in linalg module refactored --- pyerrors/linalg.py | 62 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/pyerrors/linalg.py b/pyerrors/linalg.py index dbbde944..e6ee9cef 100644 --- a/pyerrors/linalg.py +++ b/pyerrors/linalg.py @@ -174,6 +174,35 @@ def matmul(*operands): return derived_array(multi_dot, operands) +def _exp_to_jack(matrix): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = entry.export_jackknife() + return base_matrix + + +def _imp_from_jack(matrix, name, idl): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = import_jackknife(entry, name, [idl]) + return base_matrix + + +def _exp_to_jack_c(matrix): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = entry.real.export_jackknife() + 1j * entry.imag.export_jackknife() + return base_matrix + + +def _imp_from_jack_c(matrix, name, idl): + base_matrix = np.empty_like(matrix) + for index, entry in np.ndenumerate(matrix): + base_matrix[index] = CObs(import_jackknife(entry.real, name, [idl]), + import_jackknife(entry.imag, name, [idl])) + return base_matrix + + def jack_matmul(*operands): """Matrix multiply both operands making use of the jackknife approximation. @@ -190,49 +219,24 @@ def jack_matmul(*operands): name = operands[0].flat[0].real.names[0] idl = operands[0].flat[0].real.idl[name] - def _exp_to_jack(matrix): - base_matrix = np.empty_like(matrix) - for index, entry in np.ndenumerate(matrix): - base_matrix[index] = entry.real.export_jackknife() + 1j * entry.imag.export_jackknife() - return base_matrix - - def _imp_from_jack(matrix): - base_matrix = np.empty_like(matrix) - for index, entry in np.ndenumerate(matrix): - base_matrix[index] = CObs(import_jackknife(entry.real, name, [idl]), - import_jackknife(entry.imag, name, [idl])) - return base_matrix - - r = _exp_to_jack(operands[0]) + r = _exp_to_jack_c(operands[0]) for op in operands[1:]: if isinstance(op.flat[0], CObs): - r = r @ _exp_to_jack(op) + r = r @ _exp_to_jack_c(op) else: r = r @ op - return _imp_from_jack(r) + return _imp_from_jack_c(r, name, idl) else: name = operands[0].flat[0].names[0] idl = operands[0].flat[0].idl[name] - def _exp_to_jack(matrix): - base_matrix = np.empty_like(matrix) - for index, entry in np.ndenumerate(matrix): - base_matrix[index] = entry.export_jackknife() - return base_matrix - - def _imp_from_jack(matrix): - base_matrix = np.empty_like(matrix) - for index, entry in np.ndenumerate(matrix): - base_matrix[index] = import_jackknife(entry, name, [idl]) - return base_matrix - r = _exp_to_jack(operands[0]) for op in operands[1:]: if isinstance(op.flat[0], Obs): r = r @ _exp_to_jack(op) else: r = r @ op - return _imp_from_jack(r) + return _imp_from_jack(r, name, idl) def inv(x):