pyerrors.input.dobs

  1from collections import defaultdict
  2import gzip
  3import lxml.etree as et
  4import getpass
  5import socket
  6import datetime
  7import json
  8import warnings
  9import numpy as np
 10from ..obs import Obs
 11from ..obs import _merge_idx
 12from ..covobs import Covobs
 13from .. import version as pyerrorsversion
 14
 15
 16# Based on https://stackoverflow.com/a/10076823
 17def _etree_to_dict(t):
 18    """ Convert the content of an XML file to a python dict"""
 19    d = {t.tag: {} if t.attrib else None}
 20    children = list(t)
 21    if children:
 22        dd = defaultdict(list)
 23        for dc in map(_etree_to_dict, children):
 24            for k, v in dc.items():
 25                dd[k].append(v)
 26        d = {t.tag: {k: v[0] if len(v) == 1 else v
 27                     for k, v in dd.items()}}
 28    if t.attrib:
 29        d[t.tag].update(('@' + k, v)
 30                        for k, v in t.attrib.items())
 31    if t.text:
 32        text = t.text.strip()
 33        if children or t.attrib:
 34            if text:
 35                d[t.tag]['#data'] = [text]
 36        else:
 37            d[t.tag] = text
 38    return d
 39
 40
 41def _dict_to_xmlstring(d):
 42    if isinstance(d, dict):
 43        iters = ''
 44        for k in d:
 45            if k.startswith('#'):
 46                for la in d[k]:
 47                    iters += la
 48                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
 49                return iters
 50            if isinstance(d[k], dict):
 51                iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
 52            elif isinstance(d[k], str):
 53                if len(d[k]) > 100:
 54                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
 55                else:
 56                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
 57            elif isinstance(d[k], list):
 58                for i in range(len(d[k])):
 59                    iters += _dict_to_xmlstring(d[k][i])
 60            elif not d[k]:
 61                return '\n'
 62            else:
 63                raise Exception('Type', type(d[k]), 'not supported in export!')
 64    else:
 65        raise Exception('Type', type(d), 'not supported in export!')
 66    return iters
 67
 68
 69def _dict_to_xmlstring_spaces(d, space='  '):
 70    s = _dict_to_xmlstring(d)
 71    o = ''
 72    c = 0
 73    cm = False
 74    for li in s.split('\n'):
 75        if li.startswith('<%s' % ('/')):
 76            c -= 1
 77            cm = True
 78        for i in range(c):
 79            o += space
 80        o += li + '\n'
 81        if li.startswith('<') and not cm:
 82            if not '<%s' % ('/') in li:
 83                c += 1
 84        cm = False
 85    return o
 86
 87
 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89    """Export a list of Obs or structures containing Obs to an xml string
 90    according to the Zeuthen pobs format.
 91
 92    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 93
 94    Parameters
 95    ----------
 96    obsl : list
 97        List of Obs that will be exported.
 98        The Obs inside a structure have to be defined on the same ensemble.
 99    name : str
100        The name of the observable.
101    spec : str
102        Optional string that describes the contents of the file.
103    origin : str
104        Specify where the data has its origin.
105    symbol : list
106        A list of symbols that describe the observables to be written. May be empty.
107    enstag : str
108        Enstag that is written to pobs. If None, the ensemble name is used.
109    """
110
111    od = {}
112    ename = obsl[0].e_names[0]
113    names = list(obsl[0].deltas.keys())
114    nr = len(names)
115    onames = [name.replace('|', '') for name in names]
116    for o in obsl:
117        if len(o.e_names) != 1:
118            raise Exception('You try to export dobs to obs!')
119        if o.e_names[0] != ename:
120            raise Exception('You try to export dobs to obs!')
121        if len(o.deltas.keys()) != nr:
122            raise Exception('Incompatible obses in list')
123    od['observables'] = {}
124    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
125    od['observables']['origin'] = {
126        'who': getpass.getuser(),
127        'date': str(datetime.datetime.now())[:-7],
128        'host': socket.gethostname(),
129        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
130    od['observables']['pobs'] = {}
131    pd = od['observables']['pobs']
132    pd['spec'] = spec
133    pd['origin'] = origin
134    pd['name'] = name
135    if enstag:
136        if not isinstance(enstag, str):
137            raise Exception('enstag has to be a string!')
138        pd['enstag'] = enstag
139    else:
140        pd['enstag'] = ename
141    pd['nr'] = '%d' % (nr)
142    pd['array'] = []
143    osymbol = 'cfg'
144    if not isinstance(symbol, list):
145        raise Exception('Symbol has to be a list!')
146    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
147        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
148    for s in symbol:
149        osymbol += ' %s' % s
150    for r in range(nr):
151        ad = {}
152        ad['id'] = onames[r]
153        Nconf = len(obsl[0].deltas[names[r]])
154        layout = '%d i f%d' % (Nconf, len(obsl))
155        ad['layout'] = layout
156        ad['symbol'] = osymbol
157        data = ''
158        for c in range(Nconf):
159            data += '%d ' % obsl[0].idl[names[r]][c]
160            for o in obsl:
161                num = o.deltas[names[r]][c] + o.r_values[names[r]]
162                if num == 0:
163                    data += '0 '
164                else:
165                    data += '%1.16e ' % (num)
166            data += '\n'
167        ad['#data'] = data
168        pd['array'].append(ad)
169
170    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
171    return rs
172
173
174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
175    """Export a list of Obs or structures containing Obs to a .xml.gz file
176    according to the Zeuthen pobs format.
177
178    Tags are not written or recovered automatically. The separator | is removed from the replica names.
179
180    Parameters
181    ----------
182    obsl : list
183        List of Obs that will be exported.
184        The Obs inside a structure have to be defined on the same ensemble.
185    fname : str
186        Filename of the output file.
187    name : str
188        The name of the observable.
189    spec : str
190        Optional string that describes the contents of the file.
191    origin : str
192        Specify where the data has its origin.
193    symbol : list
194        A list of symbols that describe the observables to be written. May be empty.
195    enstag : str
196        Enstag that is written to pobs. If None, the ensemble name is used.
197    gz : bool
198        If True, the output is a gzipped xml. If False, the output is an xml file.
199    """
200    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
201
202    if not fname.endswith('.xml') and not fname.endswith('.gz'):
203        fname += '.xml'
204
205    if gz:
206        if not fname.endswith('.gz'):
207            fname += '.gz'
208
209        fp = gzip.open(fname, 'wb')
210        fp.write(pobsstring.encode('utf-8'))
211    else:
212        fp = open(fname, 'w', encoding='utf-8')
213        fp.write(pobsstring)
214    fp.close()
215
216
217def _import_data(string):
218    return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]")
219
220
221def _check(condition):
222    if not condition:
223        raise Exception("XML file format not supported")
224
225
226class _NoTagInDataError(Exception):
227    """Raised when tag is not in data"""
228    def __init__(self, tag):
229        self.tag = tag
230        super().__init__('Tag %s not in data!' % (self.tag))
231
232
233def _find_tag(dat, tag):
234    for i in range(len(dat)):
235        if dat[i].tag == tag:
236            return i
237    raise _NoTagInDataError(tag)
238
239
240def _import_array(arr):
241    name = arr[_find_tag(arr, 'id')].text.strip()
242    index = _find_tag(arr, 'layout')
243    try:
244        sindex = _find_tag(arr, 'symbol')
245    except _NoTagInDataError:
246        sindex = 0
247    if sindex > index:
248        tmp = _import_data(arr[sindex].tail)
249    else:
250        tmp = _import_data(arr[index].tail)
251
252    li = arr[index].text.strip()
253    m = li.split()
254    if m[1] == "i" and m[2][0] == "f":
255        nc = int(m[0])
256        na = int(m[2].lstrip('f'))
257        _dat = []
258        mask = []
259        for a in range(na):
260            mask += [a]
261            _dat += [np.array(tmp[1 + a:: na + 1])]
262        _check(len(tmp[0:: na + 1]) == nc)
263        return [name, tmp[0:: na + 1], mask, _dat]
264    elif m[1][0] == 'f' and len(m) < 3:
265        sh = (int(m[0]), int(m[1].lstrip('f')))
266        return np.reshape(tmp, sh)
267    elif any(['f' in s for s in m]):
268        for si in range(len(m)):
269            if m[si] == 'f':
270                break
271        sh = [int(m[i]) for i in range(si)]
272        return np.reshape(tmp, sh)
273    else:
274        print(name, m)
275        _check(False)
276
277
278def _import_rdata(rd):
279    name, idx, mask, deltas = _import_array(rd)
280    return deltas, name, idx
281
282
283def _import_cdata(cd):
284    _check(cd[0].tag == "id")
285    _check(cd[1][0].text.strip() == "cov")
286    cov = _import_array(cd[1])
287    grad = _import_array(cd[2])
288    return cd[0].text.strip(), cov, grad
289
290
291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
292    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
293
294    Tags are not written or recovered automatically.
295
296    Parameters
297    ----------
298    fname : str
299        Filename of the input file.
300    full_output : bool
301        If True, a dict containing auxiliary information and the data is returned.
302        If False, only the data is returned as list.
303    separatior_insertion: str or int
304        str: replace all occurences of "separator_insertion" within the replica names
305        by "|%s" % (separator_insertion) when constructing the names of the replica.
306        int: Insert the separator "|" at the position given by separator_insertion.
307        None (default): Replica names remain unchanged.
308    """
309
310    if not fname.endswith('.xml') and not fname.endswith('.gz'):
311        fname += '.xml'
312    if gz:
313        if not fname.endswith('.gz'):
314            fname += '.gz'
315        with gzip.open(fname, 'r') as fin:
316            content = fin.read()
317    else:
318        if fname.endswith('.gz'):
319            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
320        with open(fname, 'r') as fin:
321            content = fin.read()
322
323    # parse xml file content
324    root = et.fromstring(content)
325
326    _check(root[2].tag == 'pobs')
327    pobs = root[2]
328
329    version = root[0][1].text.strip()
330
331    _check(root[1].tag == 'origin')
332    file_origin = _etree_to_dict(root[1])['origin']
333
334    deltas = []
335    names = []
336    idl = []
337    for i in range(5, len(pobs)):
338        delta, name, idx = _import_rdata(pobs[i])
339        deltas.append(delta)
340        if separator_insertion is None:
341            pass
342        elif isinstance(separator_insertion, int):
343            name = name[:separator_insertion] + '|' + name[separator_insertion:]
344        elif isinstance(separator_insertion, str):
345            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
346        else:
347            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
348        names.append(name)
349        idl.append(idx)
350    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
351
352    descriptiond = {}
353    for i in range(4):
354        descriptiond[pobs[i].tag] = pobs[i].text.strip()
355
356    _check(pobs[4].tag == "nr")
357
358    _check(pobs[5].tag == 'array')
359    if pobs[5][1].tag == 'symbol':
360        symbol = pobs[5][1].text.strip()
361        descriptiond['symbol'] = symbol
362
363    if full_output:
364        retd = {}
365        tool = file_origin.get('tool', None)
366        if tool:
367            program = tool['name'] + ' ' + tool['version']
368        else:
369            program = ''
370        retd['program'] = program
371        retd['version'] = version
372        retd['who'] = file_origin['who']
373        retd['date'] = file_origin['date']
374        retd['host'] = file_origin['host']
375        retd['description'] = descriptiond
376        retd['obsdata'] = res
377        return retd
378    else:
379        return res
380
381
382# Reading (and writing) dobs is not yet working properly:
383# we have to loop over root[2:] because each entry is a dobs
384# But maybe this is just a problem with Ben's implementation
385
386# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
388    """Import a list of Obs from a string in the Zeuthen dobs format.
389
390    Tags are not written or recovered automatically.
391
392    Parameters
393    ----------
394    content : str
395        XML string containing the data
396    noemtpy : bool
397        If True, ensembles with no contribution to the Obs are not included.
398        If False, ensembles are included as written in the file, possibly with vanishing entries.
399    full_output : bool
400        If True, a dict containing auxiliary information and the data is returned.
401        If False, only the data is returned as list.
402    separatior_insertion: str, int or bool
403        str: replace all occurences of "separator_insertion" within the replica names
404        by "|%s" % (separator_insertion) when constructing the names of the replica.
405        int: Insert the separator "|" at the position given by separator_insertion.
406        True (default): separator "|" is inserted after len(ensname), assuming that the
407        ensemble name is a prefix to the replica name.
408        None or False: No separator is inserted.
409    """
410
411    root = et.fromstring(content)
412
413    _check(root.tag == 'OBSERVABLES')
414    _check(root[0].tag == 'SCHEMA')
415    version = root[0][1].text.strip()
416
417    _check(root[1].tag == 'origin')
418    file_origin = _etree_to_dict(root[1])['origin']
419
420    _check(root[2].tag == 'dobs')
421
422    dobs = root[2]
423
424    descriptiond = {}
425    for i in range(3):
426        descriptiond[dobs[i].tag] = dobs[i].text.strip()
427
428    _check(dobs[3].tag == 'array')
429
430    symbol = []
431    if dobs[3][1].tag == 'symbol':
432        symbol = dobs[3][1].text.strip()
433        descriptiond['symbol'] = symbol
434    mean = _import_array(dobs[3])[0]
435
436    _check(dobs[4].tag == "ne")
437    ne = int(dobs[4].text.strip())
438    _check(dobs[5].tag == "nc")
439    nc = int(dobs[5].text.strip())
440
441    idld = {}
442    deltad = {}
443    covd = {}
444    gradd = {}
445    names = []
446    e_names = []
447    enstags = {}
448    for k in range(6, len(list(dobs))):
449        if dobs[k].tag == "edata":
450            _check(dobs[k][0].tag == "enstag")
451            ename = dobs[k][0].text.strip()
452            e_names.append(ename)
453            _check(dobs[k][1].tag == "nr")
454            R = int(dobs[k][1].text.strip())
455            for i in range(2, 2 + R):
456                deltas, rname, idx = _import_rdata(dobs[k][i])
457                if separator_insertion is None or False:
458                    pass
459                elif separator_insertion is True:
460                    if rname.startswith(ename):
461                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
462                elif isinstance(separator_insertion, int):
463                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
464                elif isinstance(separator_insertion, str):
465                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
466                else:
467                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
468                if '|' in rname:
469                    new_ename = rname[:rname.index('|')]
470                else:
471                    new_ename = ename
472                enstags[new_ename] = ename
473                idld[rname] = idx
474                deltad[rname] = deltas
475                names.append(rname)
476        elif dobs[k].tag == "cdata":
477            cname, cov, grad = _import_cdata(dobs[k])
478            covd[cname] = cov
479            if grad.shape[1] == 1:
480                gradd[cname] = [grad for i in range(len(mean))]
481            else:
482                gradd[cname] = grad.T
483        else:
484            _check(False)
485    names = list(set(names))
486
487    for name in names:
488        for i in range(len(deltad[name])):
489            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
490
491    res = []
492    for i in range(len(mean)):
493        deltas = []
494        idl = []
495        obs_names = []
496        for name in names:
497            h = np.unique(deltad[name][i])
498            if len(h) == 1 and np.all(h == mean[i]) and noempty:
499                continue
500            deltas.append(deltad[name][i])
501            obs_names.append(name)
502            idl.append(idld[name])
503        res.append(Obs(deltas, obs_names, idl=idl))
504    print(mean, 'vs', res)
505    _check(len(e_names) == ne)
506
507    cnames = list(covd.keys())
508    for i in range(len(res)):
509        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
510        if noempty:
511            for name in cnames:
512                if np.all(new_covobs[name].grad == 0):
513                    del new_covobs[name]
514            cnames_loc = list(new_covobs.keys())
515        else:
516            cnames_loc = cnames
517        for name in cnames_loc:
518            res[i].names.append(name)
519            res[i].shape[name] = 1
520            res[i].idl[name] = []
521        res[i]._covobs = new_covobs
522
523    if symbol:
524        for i in range(len(res)):
525            res[i].tag = symbol[i]
526            if res[i].tag == 'None':
527                res[i].tag = None
528    if not noempty:
529        _check(len(res[0].covobs.keys()) == nc)
530    if full_output:
531        retd = {}
532        tool = file_origin.get('tool', None)
533        if tool:
534            program = tool['name'] + ' ' + tool['version']
535        else:
536            program = ''
537        retd['program'] = program
538        retd['version'] = version
539        retd['who'] = file_origin['who']
540        retd['date'] = file_origin['date']
541        retd['host'] = file_origin['host']
542        retd['description'] = descriptiond
543        retd['enstags'] = enstags
544        retd['obsdata'] = res
545        return retd
546    else:
547        return res
548
549
550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
551    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
552
553    Tags are not written or recovered automatically.
554
555    Parameters
556    ----------
557    fname : str
558        Filename of the input file.
559    noemtpy : bool
560        If True, ensembles with no contribution to the Obs are not included.
561        If False, ensembles are included as written in the file.
562    full_output : bool
563        If True, a dict containing auxiliary information and the data is returned.
564        If False, only the data is returned as list.
565    gz : bool
566        If True, assumes that data is gzipped. If False, assumes XML file.
567    separatior_insertion: str, int or bool
568        str: replace all occurences of "separator_insertion" within the replica names
569        by "|%s" % (separator_insertion) when constructing the names of the replica.
570        int: Insert the separator "|" at the position given by separator_insertion.
571        True (default): separator "|" is inserted after len(ensname), assuming that the
572        ensemble name is a prefix to the replica name.
573        None or False: No separator is inserted.
574    """
575
576    if not fname.endswith('.xml') and not fname.endswith('.gz'):
577        fname += '.xml'
578    if gz:
579        if not fname.endswith('.gz'):
580            fname += '.gz'
581        with gzip.open(fname, 'r') as fin:
582            content = fin.read().decode('utf-8')
583    else:
584        if fname.endswith('.gz'):
585            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
586        with open(fname, 'r', encoding='utf-8') as fin:
587            content = fin.read()
588
589    # open and read gzipped xml file
590    infile = gzip.open(fname)
591    content = infile.read()
592
593    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
594
595
596def _dobsdict_to_xmlstring(d):
597    if isinstance(d, dict):
598        iters = ''
599        for k in d:
600            if k.startswith('#value'):
601                for li in d[k]:
602                    iters += li
603                return iters + '\n'
604            elif k.startswith('#'):
605                for li in d[k]:
606                    iters += li
607                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
608                return iters
609            if isinstance(d[k], dict):
610                iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
611            elif isinstance(d[k], str):
612                if len(d[k]) > 100:
613                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
614                else:
615                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
616            elif isinstance(d[k], list):
617                tmps = ''
618                if k in ['edata', 'cdata']:
619                    for i in range(len(d[k])):
620                        tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k)
621                else:
622                    for i in range(len(d[k])):
623                        tmps += _dobsdict_to_xmlstring(d[k][i])
624                iters += tmps
625            elif isinstance(d[k], (int, float)):
626                iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k)
627            elif not d[k]:
628                return '\n'
629            else:
630                raise Exception('Type', type(d[k]), 'not supported in export!')
631    else:
632        raise Exception('Type', type(d), 'not supported in export!')
633    return iters
634
635
636def _dobsdict_to_xmlstring_spaces(d, space='  '):
637    s = _dobsdict_to_xmlstring(d)
638    o = ''
639    c = 0
640    cm = False
641    for li in s.split('\n'):
642        if li.startswith('<%s' % ('/')):
643            c -= 1
644            cm = True
645        for i in range(c):
646            o += space
647        o += li + '\n'
648        if li.startswith('<') and not cm:
649            if not '<%s' % ('/') in li:
650                c += 1
651        cm = False
652    return o
653
654
655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
656    """Generate the string for the export of a list of Obs or structures containing Obs
657    to a .xml.gz file according to the Zeuthen dobs format.
658
659    Tags are not written or recovered automatically. The separator |is removed from the replica names.
660
661    Parameters
662    ----------
663    obsl : list
664        List of Obs that will be exported.
665        The Obs inside a structure do not have to be defined on the same set of configurations,
666        but the storage requirement is increased, if this is not the case.
667    name : str
668        The name of the observable.
669    spec : str
670        Optional string that describes the contents of the file.
671    origin : str
672        Specify where the data has its origin.
673    symbol : list
674        A list of symbols that describe the observables to be written. May be empty.
675    who : str
676        Provide the name of the person that exports the data.
677    enstags : dict
678        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
679        Otherwise, the ensemble name is used.
680    """
681    if enstags is None:
682        enstags = {}
683    od = {}
684    r_names = []
685    for o in obsl:
686        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
687    r_names = sorted(set(r_names))
688    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
689    for tmpname in mc_names:
690        if tmpname not in enstags:
691            enstags[tmpname] = tmpname
692    ne = len(set(mc_names))
693    cov_names = []
694    for o in obsl:
695        cov_names += list(o.cov_names)
696    cov_names = sorted(set(cov_names))
697    nc = len(set(cov_names))
698    od['OBSERVABLES'] = {}
699    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
700    if who is None:
701        who = getpass.getuser()
702    od['OBSERVABLES']['origin'] = {
703        'who': who,
704        'date': str(datetime.datetime.now())[:-7],
705        'host': socket.gethostname(),
706        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
707    od['OBSERVABLES']['dobs'] = {}
708    pd = od['OBSERVABLES']['dobs']
709    pd['spec'] = spec
710    pd['origin'] = origin
711    pd['name'] = name
712    pd['array'] = {}
713    pd['array']['id'] = 'val'
714    pd['array']['layout'] = '1 f%d' % (len(obsl))
715    osymbol = ''
716    if symbol:
717        if not isinstance(symbol, list):
718            raise Exception('Symbol has to be a list!')
719        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
720            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
721        osymbol = symbol[0]
722        for s in symbol[1:]:
723            osymbol += ' %s' % s
724        pd['array']['symbol'] = osymbol
725
726    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
727    pd['ne'] = '%d' % (ne)
728    pd['nc'] = '%d' % (nc)
729    pd['edata'] = []
730    for name in mc_names:
731        ed = {}
732        ed['enstag'] = enstags[name]
733        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
734        nr = len(onames)
735        ed['nr'] = nr
736        ed[''] = []
737
738        for r in range(nr):
739            ad = {}
740            repname = onames[r]
741            ad['id'] = repname.replace('|', '')
742            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
743            Nconf = len(idx)
744            layout = '%d i f%d' % (Nconf, len(obsl))
745            ad['layout'] = layout
746            data = ''
747            counters = [0 for o in obsl]
748            for ci in idx:
749                data += '%d ' % ci
750                for oi in range(len(obsl)):
751                    o = obsl[oi]
752                    if repname in o.idl:
753                        if counters[oi] < 0:
754                            data += '0 '
755                            continue
756                        if o.idl[repname][counters[oi]] == ci:
757                            num = o.deltas[repname][counters[oi]]
758                            if num == 0:
759                                data += '0 '
760                            else:
761                                data += '%1.16e ' % (num)
762                            counters[oi] += 1
763                            if counters[oi] >= len(o.idl[repname]):
764                                counters[oi] = -1
765                        else:
766                            data += '0 '
767                    else:
768                        data += '0 '
769                data += '\n'
770            ad['#data'] = data
771            ed[''].append(ad)
772        pd['edata'].append(ed)
773
774        allcov = {}
775        for o in obsl:
776            for name in o.cov_names:
777                if name in allcov:
778                    if not np.array_equal(allcov[name], o.covobs[name].cov):
779                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
780                else:
781                    allcov[name] = o.covobs[name].cov
782        pd['cdata'] = []
783        for name in cov_names:
784            cd = {}
785            cd['id'] = name
786
787            covd = {'id': 'cov'}
788            if allcov[name].shape == ():
789                ncov = 1
790                covd['layout'] = '1 1 f'
791                covd['#data'] = '%1.14e' % (allcov[name])
792            else:
793                shape = allcov[name].shape
794                assert (shape[0] == shape[1])
795                ncov = shape[0]
796                covd['layout'] = '%d %d f' % (ncov, ncov)
797                ds = ''
798                for i in range(ncov):
799                    for j in range(ncov):
800                        val = allcov[name][i][j]
801                        if val == 0:
802                            ds += '0 '
803                        else:
804                            ds += '%1.14e ' % (val)
805                    ds += '\n'
806                covd['#data'] = ds
807
808            gradd = {'id': 'grad'}
809            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
810            ds = ''
811            for i in range(ncov):
812                for o in obsl:
813                    if name in o.covobs:
814                        val = o.covobs[name].grad[i]
815                        if val != 0:
816                            ds += '%1.14e ' % (val)
817                        else:
818                            ds += '0 '
819                    else:
820                        ds += '0 '
821            gradd['#data'] = ds
822            cd['array'] = [covd, gradd]
823            pd['cdata'].append(cd)
824
825    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
826
827    return rs
828
829
830def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True):
831    """Export a list of Obs or structures containing Obs to a .xml.gz file
832    according to the Zeuthen dobs format.
833
834    Tags are not written or recovered automatically. The separator | is removed from the replica names.
835
836    Parameters
837    ----------
838    obsl : list
839        List of Obs that will be exported.
840        The Obs inside a structure do not have to be defined on the same set of configurations,
841        but the storage requirement is increased, if this is not the case.
842    fname : str
843        Filename of the output file.
844    name : str
845        The name of the observable.
846    spec : str
847        Optional string that describes the contents of the file.
848    origin : str
849        Specify where the data has its origin.
850    symbol : list
851        A list of symbols that describe the observables to be written. May be empty.
852    who : str
853        Provide the name of the person that exports the data.
854    enstags : dict
855        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
856        Otherwise, the ensemble name is used.
857    gz : bool
858        If True, the output is a gzipped XML. If False, the output is a XML file.
859    """
860
861    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
862
863    if not fname.endswith('.xml') and not fname.endswith('.gz'):
864        fname += '.xml'
865
866    if gz:
867        if not fname.endswith('.gz'):
868            fname += '.gz'
869
870        fp = gzip.open(fname, 'wb')
871        fp.write(dobsstring.encode('utf-8'))
872    else:
873        fp = open(fname, 'w', encoding='utf-8')
874        fp.write(dobsstring)
875    fp.close()
def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None)
 89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 90    """Export a list of Obs or structures containing Obs to an xml string
 91    according to the Zeuthen pobs format.
 92
 93    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 94
 95    Parameters
 96    ----------
 97    obsl : list
 98        List of Obs that will be exported.
 99        The Obs inside a structure have to be defined on the same ensemble.
100    name : str
101        The name of the observable.
102    spec : str
103        Optional string that describes the contents of the file.
104    origin : str
105        Specify where the data has its origin.
106    symbol : list
107        A list of symbols that describe the observables to be written. May be empty.
108    enstag : str
109        Enstag that is written to pobs. If None, the ensemble name is used.
110    """
111
112    od = {}
113    ename = obsl[0].e_names[0]
114    names = list(obsl[0].deltas.keys())
115    nr = len(names)
116    onames = [name.replace('|', '') for name in names]
117    for o in obsl:
118        if len(o.e_names) != 1:
119            raise Exception('You try to export dobs to obs!')
120        if o.e_names[0] != ename:
121            raise Exception('You try to export dobs to obs!')
122        if len(o.deltas.keys()) != nr:
123            raise Exception('Incompatible obses in list')
124    od['observables'] = {}
125    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
126    od['observables']['origin'] = {
127        'who': getpass.getuser(),
128        'date': str(datetime.datetime.now())[:-7],
129        'host': socket.gethostname(),
130        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
131    od['observables']['pobs'] = {}
132    pd = od['observables']['pobs']
133    pd['spec'] = spec
134    pd['origin'] = origin
135    pd['name'] = name
136    if enstag:
137        if not isinstance(enstag, str):
138            raise Exception('enstag has to be a string!')
139        pd['enstag'] = enstag
140    else:
141        pd['enstag'] = ename
142    pd['nr'] = '%d' % (nr)
143    pd['array'] = []
144    osymbol = 'cfg'
145    if not isinstance(symbol, list):
146        raise Exception('Symbol has to be a list!')
147    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
148        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
149    for s in symbol:
150        osymbol += ' %s' % s
151    for r in range(nr):
152        ad = {}
153        ad['id'] = onames[r]
154        Nconf = len(obsl[0].deltas[names[r]])
155        layout = '%d i f%d' % (Nconf, len(obsl))
156        ad['layout'] = layout
157        ad['symbol'] = osymbol
158        data = ''
159        for c in range(Nconf):
160            data += '%d ' % obsl[0].idl[names[r]][c]
161            for o in obsl:
162                num = o.deltas[names[r]][c] + o.r_values[names[r]]
163                if num == 0:
164                    data += '0 '
165                else:
166                    data += '%1.16e ' % (num)
167            data += '\n'
168        ad['#data'] = data
169        pd['array'].append(ad)
170
171    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
172    return rs

Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
def write_pobs( obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True)
175def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
176    """Export a list of Obs or structures containing Obs to a .xml.gz file
177    according to the Zeuthen pobs format.
178
179    Tags are not written or recovered automatically. The separator | is removed from the replica names.
180
181    Parameters
182    ----------
183    obsl : list
184        List of Obs that will be exported.
185        The Obs inside a structure have to be defined on the same ensemble.
186    fname : str
187        Filename of the output file.
188    name : str
189        The name of the observable.
190    spec : str
191        Optional string that describes the contents of the file.
192    origin : str
193        Specify where the data has its origin.
194    symbol : list
195        A list of symbols that describe the observables to be written. May be empty.
196    enstag : str
197        Enstag that is written to pobs. If None, the ensemble name is used.
198    gz : bool
199        If True, the output is a gzipped xml. If False, the output is an xml file.
200    """
201    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
202
203    if not fname.endswith('.xml') and not fname.endswith('.gz'):
204        fname += '.xml'
205
206    if gz:
207        if not fname.endswith('.gz'):
208            fname += '.gz'
209
210        fp = gzip.open(fname, 'wb')
211        fp.write(pobsstring.encode('utf-8'))
212    else:
213        fp = open(fname, 'w', encoding='utf-8')
214        fp.write(pobsstring)
215    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
def read_pobs(fname, full_output=False, gz=True, separator_insertion=None)
292def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
293    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
294
295    Tags are not written or recovered automatically.
296
297    Parameters
298    ----------
299    fname : str
300        Filename of the input file.
301    full_output : bool
302        If True, a dict containing auxiliary information and the data is returned.
303        If False, only the data is returned as list.
304    separatior_insertion: str or int
305        str: replace all occurences of "separator_insertion" within the replica names
306        by "|%s" % (separator_insertion) when constructing the names of the replica.
307        int: Insert the separator "|" at the position given by separator_insertion.
308        None (default): Replica names remain unchanged.
309    """
310
311    if not fname.endswith('.xml') and not fname.endswith('.gz'):
312        fname += '.xml'
313    if gz:
314        if not fname.endswith('.gz'):
315            fname += '.gz'
316        with gzip.open(fname, 'r') as fin:
317            content = fin.read()
318    else:
319        if fname.endswith('.gz'):
320            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
321        with open(fname, 'r') as fin:
322            content = fin.read()
323
324    # parse xml file content
325    root = et.fromstring(content)
326
327    _check(root[2].tag == 'pobs')
328    pobs = root[2]
329
330    version = root[0][1].text.strip()
331
332    _check(root[1].tag == 'origin')
333    file_origin = _etree_to_dict(root[1])['origin']
334
335    deltas = []
336    names = []
337    idl = []
338    for i in range(5, len(pobs)):
339        delta, name, idx = _import_rdata(pobs[i])
340        deltas.append(delta)
341        if separator_insertion is None:
342            pass
343        elif isinstance(separator_insertion, int):
344            name = name[:separator_insertion] + '|' + name[separator_insertion:]
345        elif isinstance(separator_insertion, str):
346            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
347        else:
348            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
349        names.append(name)
350        idl.append(idx)
351    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
352
353    descriptiond = {}
354    for i in range(4):
355        descriptiond[pobs[i].tag] = pobs[i].text.strip()
356
357    _check(pobs[4].tag == "nr")
358
359    _check(pobs[5].tag == 'array')
360    if pobs[5][1].tag == 'symbol':
361        symbol = pobs[5][1].text.strip()
362        descriptiond['symbol'] = symbol
363
364    if full_output:
365        retd = {}
366        tool = file_origin.get('tool', None)
367        if tool:
368            program = tool['name'] + ' ' + tool['version']
369        else:
370            program = ''
371        retd['program'] = program
372        retd['version'] = version
373        retd['who'] = file_origin['who']
374        retd['date'] = file_origin['date']
375        retd['host'] = file_origin['host']
376        retd['description'] = descriptiond
377        retd['obsdata'] = res
378        return retd
379    else:
380        return res

Import a list of Obs from an xml.gz file in the Zeuthen pobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True)
388def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
389    """Import a list of Obs from a string in the Zeuthen dobs format.
390
391    Tags are not written or recovered automatically.
392
393    Parameters
394    ----------
395    content : str
396        XML string containing the data
397    noemtpy : bool
398        If True, ensembles with no contribution to the Obs are not included.
399        If False, ensembles are included as written in the file, possibly with vanishing entries.
400    full_output : bool
401        If True, a dict containing auxiliary information and the data is returned.
402        If False, only the data is returned as list.
403    separatior_insertion: str, int or bool
404        str: replace all occurences of "separator_insertion" within the replica names
405        by "|%s" % (separator_insertion) when constructing the names of the replica.
406        int: Insert the separator "|" at the position given by separator_insertion.
407        True (default): separator "|" is inserted after len(ensname), assuming that the
408        ensemble name is a prefix to the replica name.
409        None or False: No separator is inserted.
410    """
411
412    root = et.fromstring(content)
413
414    _check(root.tag == 'OBSERVABLES')
415    _check(root[0].tag == 'SCHEMA')
416    version = root[0][1].text.strip()
417
418    _check(root[1].tag == 'origin')
419    file_origin = _etree_to_dict(root[1])['origin']
420
421    _check(root[2].tag == 'dobs')
422
423    dobs = root[2]
424
425    descriptiond = {}
426    for i in range(3):
427        descriptiond[dobs[i].tag] = dobs[i].text.strip()
428
429    _check(dobs[3].tag == 'array')
430
431    symbol = []
432    if dobs[3][1].tag == 'symbol':
433        symbol = dobs[3][1].text.strip()
434        descriptiond['symbol'] = symbol
435    mean = _import_array(dobs[3])[0]
436
437    _check(dobs[4].tag == "ne")
438    ne = int(dobs[4].text.strip())
439    _check(dobs[5].tag == "nc")
440    nc = int(dobs[5].text.strip())
441
442    idld = {}
443    deltad = {}
444    covd = {}
445    gradd = {}
446    names = []
447    e_names = []
448    enstags = {}
449    for k in range(6, len(list(dobs))):
450        if dobs[k].tag == "edata":
451            _check(dobs[k][0].tag == "enstag")
452            ename = dobs[k][0].text.strip()
453            e_names.append(ename)
454            _check(dobs[k][1].tag == "nr")
455            R = int(dobs[k][1].text.strip())
456            for i in range(2, 2 + R):
457                deltas, rname, idx = _import_rdata(dobs[k][i])
458                if separator_insertion is None or False:
459                    pass
460                elif separator_insertion is True:
461                    if rname.startswith(ename):
462                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
463                elif isinstance(separator_insertion, int):
464                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
465                elif isinstance(separator_insertion, str):
466                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
467                else:
468                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
469                if '|' in rname:
470                    new_ename = rname[:rname.index('|')]
471                else:
472                    new_ename = ename
473                enstags[new_ename] = ename
474                idld[rname] = idx
475                deltad[rname] = deltas
476                names.append(rname)
477        elif dobs[k].tag == "cdata":
478            cname, cov, grad = _import_cdata(dobs[k])
479            covd[cname] = cov
480            if grad.shape[1] == 1:
481                gradd[cname] = [grad for i in range(len(mean))]
482            else:
483                gradd[cname] = grad.T
484        else:
485            _check(False)
486    names = list(set(names))
487
488    for name in names:
489        for i in range(len(deltad[name])):
490            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
491
492    res = []
493    for i in range(len(mean)):
494        deltas = []
495        idl = []
496        obs_names = []
497        for name in names:
498            h = np.unique(deltad[name][i])
499            if len(h) == 1 and np.all(h == mean[i]) and noempty:
500                continue
501            deltas.append(deltad[name][i])
502            obs_names.append(name)
503            idl.append(idld[name])
504        res.append(Obs(deltas, obs_names, idl=idl))
505    print(mean, 'vs', res)
506    _check(len(e_names) == ne)
507
508    cnames = list(covd.keys())
509    for i in range(len(res)):
510        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
511        if noempty:
512            for name in cnames:
513                if np.all(new_covobs[name].grad == 0):
514                    del new_covobs[name]
515            cnames_loc = list(new_covobs.keys())
516        else:
517            cnames_loc = cnames
518        for name in cnames_loc:
519            res[i].names.append(name)
520            res[i].shape[name] = 1
521            res[i].idl[name] = []
522        res[i]._covobs = new_covobs
523
524    if symbol:
525        for i in range(len(res)):
526            res[i].tag = symbol[i]
527            if res[i].tag == 'None':
528                res[i].tag = None
529    if not noempty:
530        _check(len(res[0].covobs.keys()) == nc)
531    if full_output:
532        retd = {}
533        tool = file_origin.get('tool', None)
534        if tool:
535            program = tool['name'] + ' ' + tool['version']
536        else:
537            program = ''
538        retd['program'] = program
539        retd['version'] = version
540        retd['who'] = file_origin['who']
541        retd['date'] = file_origin['date']
542        retd['host'] = file_origin['host']
543        retd['description'] = descriptiond
544        retd['enstags'] = enstags
545        retd['obsdata'] = res
546        return retd
547    else:
548        return res

Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • content (str): XML string containing the data
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
def read_dobs( fname, noempty=False, full_output=False, gz=True, separator_insertion=True)
551def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
552    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
553
554    Tags are not written or recovered automatically.
555
556    Parameters
557    ----------
558    fname : str
559        Filename of the input file.
560    noemtpy : bool
561        If True, ensembles with no contribution to the Obs are not included.
562        If False, ensembles are included as written in the file.
563    full_output : bool
564        If True, a dict containing auxiliary information and the data is returned.
565        If False, only the data is returned as list.
566    gz : bool
567        If True, assumes that data is gzipped. If False, assumes XML file.
568    separatior_insertion: str, int or bool
569        str: replace all occurences of "separator_insertion" within the replica names
570        by "|%s" % (separator_insertion) when constructing the names of the replica.
571        int: Insert the separator "|" at the position given by separator_insertion.
572        True (default): separator "|" is inserted after len(ensname), assuming that the
573        ensemble name is a prefix to the replica name.
574        None or False: No separator is inserted.
575    """
576
577    if not fname.endswith('.xml') and not fname.endswith('.gz'):
578        fname += '.xml'
579    if gz:
580        if not fname.endswith('.gz'):
581            fname += '.gz'
582        with gzip.open(fname, 'r') as fin:
583            content = fin.read().decode('utf-8')
584    else:
585        if fname.endswith('.gz'):
586            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
587        with open(fname, 'r', encoding='utf-8') as fin:
588            content = fin.read()
589
590    # open and read gzipped xml file
591    infile = gzip.open(fname)
592    content = infile.read()
593
594    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)

Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
def create_dobs_string( obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None)
656def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
657    """Generate the string for the export of a list of Obs or structures containing Obs
658    to a .xml.gz file according to the Zeuthen dobs format.
659
660    Tags are not written or recovered automatically. The separator |is removed from the replica names.
661
662    Parameters
663    ----------
664    obsl : list
665        List of Obs that will be exported.
666        The Obs inside a structure do not have to be defined on the same set of configurations,
667        but the storage requirement is increased, if this is not the case.
668    name : str
669        The name of the observable.
670    spec : str
671        Optional string that describes the contents of the file.
672    origin : str
673        Specify where the data has its origin.
674    symbol : list
675        A list of symbols that describe the observables to be written. May be empty.
676    who : str
677        Provide the name of the person that exports the data.
678    enstags : dict
679        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
680        Otherwise, the ensemble name is used.
681    """
682    if enstags is None:
683        enstags = {}
684    od = {}
685    r_names = []
686    for o in obsl:
687        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
688    r_names = sorted(set(r_names))
689    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
690    for tmpname in mc_names:
691        if tmpname not in enstags:
692            enstags[tmpname] = tmpname
693    ne = len(set(mc_names))
694    cov_names = []
695    for o in obsl:
696        cov_names += list(o.cov_names)
697    cov_names = sorted(set(cov_names))
698    nc = len(set(cov_names))
699    od['OBSERVABLES'] = {}
700    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
701    if who is None:
702        who = getpass.getuser()
703    od['OBSERVABLES']['origin'] = {
704        'who': who,
705        'date': str(datetime.datetime.now())[:-7],
706        'host': socket.gethostname(),
707        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
708    od['OBSERVABLES']['dobs'] = {}
709    pd = od['OBSERVABLES']['dobs']
710    pd['spec'] = spec
711    pd['origin'] = origin
712    pd['name'] = name
713    pd['array'] = {}
714    pd['array']['id'] = 'val'
715    pd['array']['layout'] = '1 f%d' % (len(obsl))
716    osymbol = ''
717    if symbol:
718        if not isinstance(symbol, list):
719            raise Exception('Symbol has to be a list!')
720        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
721            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
722        osymbol = symbol[0]
723        for s in symbol[1:]:
724            osymbol += ' %s' % s
725        pd['array']['symbol'] = osymbol
726
727    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
728    pd['ne'] = '%d' % (ne)
729    pd['nc'] = '%d' % (nc)
730    pd['edata'] = []
731    for name in mc_names:
732        ed = {}
733        ed['enstag'] = enstags[name]
734        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
735        nr = len(onames)
736        ed['nr'] = nr
737        ed[''] = []
738
739        for r in range(nr):
740            ad = {}
741            repname = onames[r]
742            ad['id'] = repname.replace('|', '')
743            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
744            Nconf = len(idx)
745            layout = '%d i f%d' % (Nconf, len(obsl))
746            ad['layout'] = layout
747            data = ''
748            counters = [0 for o in obsl]
749            for ci in idx:
750                data += '%d ' % ci
751                for oi in range(len(obsl)):
752                    o = obsl[oi]
753                    if repname in o.idl:
754                        if counters[oi] < 0:
755                            data += '0 '
756                            continue
757                        if o.idl[repname][counters[oi]] == ci:
758                            num = o.deltas[repname][counters[oi]]
759                            if num == 0:
760                                data += '0 '
761                            else:
762                                data += '%1.16e ' % (num)
763                            counters[oi] += 1
764                            if counters[oi] >= len(o.idl[repname]):
765                                counters[oi] = -1
766                        else:
767                            data += '0 '
768                    else:
769                        data += '0 '
770                data += '\n'
771            ad['#data'] = data
772            ed[''].append(ad)
773        pd['edata'].append(ed)
774
775        allcov = {}
776        for o in obsl:
777            for name in o.cov_names:
778                if name in allcov:
779                    if not np.array_equal(allcov[name], o.covobs[name].cov):
780                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
781                else:
782                    allcov[name] = o.covobs[name].cov
783        pd['cdata'] = []
784        for name in cov_names:
785            cd = {}
786            cd['id'] = name
787
788            covd = {'id': 'cov'}
789            if allcov[name].shape == ():
790                ncov = 1
791                covd['layout'] = '1 1 f'
792                covd['#data'] = '%1.14e' % (allcov[name])
793            else:
794                shape = allcov[name].shape
795                assert (shape[0] == shape[1])
796                ncov = shape[0]
797                covd['layout'] = '%d %d f' % (ncov, ncov)
798                ds = ''
799                for i in range(ncov):
800                    for j in range(ncov):
801                        val = allcov[name][i][j]
802                        if val == 0:
803                            ds += '0 '
804                        else:
805                            ds += '%1.14e ' % (val)
806                    ds += '\n'
807                covd['#data'] = ds
808
809            gradd = {'id': 'grad'}
810            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
811            ds = ''
812            for i in range(ncov):
813                for o in obsl:
814                    if name in o.covobs:
815                        val = o.covobs[name].grad[i]
816                        if val != 0:
817                            ds += '%1.14e ' % (val)
818                        else:
819                            ds += '0 '
820                    else:
821                        ds += '0 '
822            gradd['#data'] = ds
823            cd['array'] = [covd, gradd]
824            pd['cdata'].append(cd)
825
826    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
827
828    return rs

Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator |is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
def write_dobs( obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True)
831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True):
832    """Export a list of Obs or structures containing Obs to a .xml.gz file
833    according to the Zeuthen dobs format.
834
835    Tags are not written or recovered automatically. The separator | is removed from the replica names.
836
837    Parameters
838    ----------
839    obsl : list
840        List of Obs that will be exported.
841        The Obs inside a structure do not have to be defined on the same set of configurations,
842        but the storage requirement is increased, if this is not the case.
843    fname : str
844        Filename of the output file.
845    name : str
846        The name of the observable.
847    spec : str
848        Optional string that describes the contents of the file.
849    origin : str
850        Specify where the data has its origin.
851    symbol : list
852        A list of symbols that describe the observables to be written. May be empty.
853    who : str
854        Provide the name of the person that exports the data.
855    enstags : dict
856        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
857        Otherwise, the ensemble name is used.
858    gz : bool
859        If True, the output is a gzipped XML. If False, the output is a XML file.
860    """
861
862    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
863
864    if not fname.endswith('.xml') and not fname.endswith('.gz'):
865        fname += '.xml'
866
867    if gz:
868        if not fname.endswith('.gz'):
869            fname += '.gz'
870
871        fp = gzip.open(fname, 'wb')
872        fp.write(dobsstring.encode('utf-8'))
873    else:
874        fp = open(fname, 'w', encoding='utf-8')
875        fp.write(dobsstring)
876    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.