pyerrors.input.dobs

  1from collections import defaultdict
  2import gzip
  3import lxml.etree as et
  4import getpass
  5import socket
  6import datetime
  7import json
  8import warnings
  9import numpy as np
 10from ..obs import Obs
 11from ..obs import _merge_idx
 12from ..covobs import Covobs
 13from .. import version as pyerrorsversion
 14
 15
 16# Based on https://stackoverflow.com/a/10076823
 17def _etree_to_dict(t):
 18    """ Convert the content of an XML file to a python dict"""
 19    d = {t.tag: {} if t.attrib else None}
 20    children = list(t)
 21    if children:
 22        dd = defaultdict(list)
 23        for dc in map(_etree_to_dict, children):
 24            for k, v in dc.items():
 25                dd[k].append(v)
 26        d = {t.tag: {k: v[0] if len(v) == 1 else v
 27                     for k, v in dd.items()}}
 28    if t.attrib:
 29        d[t.tag].update(('@' + k, v)
 30                        for k, v in t.attrib.items())
 31    if t.text:
 32        text = t.text.strip()
 33        if children or t.attrib:
 34            if text:
 35                d[t.tag]['#data'] = [text]
 36        else:
 37            d[t.tag] = text
 38    return d
 39
 40
 41def _dict_to_xmlstring(d):
 42    if isinstance(d, dict):
 43        iters = ''
 44        for k in d:
 45            if k.startswith('#'):
 46                for la in d[k]:
 47                    iters += la
 48                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
 49                return iters
 50            if isinstance(d[k], dict):
 51                iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
 52            elif isinstance(d[k], str):
 53                if len(d[k]) > 100:
 54                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
 55                else:
 56                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
 57            elif isinstance(d[k], list):
 58                for i in range(len(d[k])):
 59                    iters += _dict_to_xmlstring(d[k][i])
 60            elif not d[k]:
 61                return '\n'
 62            else:
 63                raise Exception('Type', type(d[k]), 'not supported in export!')
 64    else:
 65        raise Exception('Type', type(d), 'not supported in export!')
 66    return iters
 67
 68
 69def _dict_to_xmlstring_spaces(d, space='  '):
 70    s = _dict_to_xmlstring(d)
 71    o = ''
 72    c = 0
 73    cm = False
 74    for li in s.split('\n'):
 75        if li.startswith('<%s' % ('/')):
 76            c -= 1
 77            cm = True
 78        for i in range(c):
 79            o += space
 80        o += li + '\n'
 81        if li.startswith('<') and not cm:
 82            if not '<%s' % ('/') in li:
 83                c += 1
 84        cm = False
 85    return o
 86
 87
 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89    """Export a list of Obs or structures containing Obs to an xml string
 90    according to the Zeuthen pobs format.
 91
 92    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 93
 94    Parameters
 95    ----------
 96    obsl : list
 97        List of Obs that will be exported.
 98        The Obs inside a structure have to be defined on the same ensemble.
 99    name : str
100        The name of the observable.
101    spec : str
102        Optional string that describes the contents of the file.
103    origin : str
104        Specify where the data has its origin.
105    symbol : list
106        A list of symbols that describe the observables to be written. May be empty.
107    enstag : str
108        Enstag that is written to pobs. If None, the ensemble name is used.
109    """
110
111    od = {}
112    ename = obsl[0].e_names[0]
113    names = list(obsl[0].deltas.keys())
114    nr = len(names)
115    onames = [name.replace('|', '') for name in names]
116    for o in obsl:
117        if len(o.e_names) != 1:
118            raise Exception('You try to export dobs to obs!')
119        if o.e_names[0] != ename:
120            raise Exception('You try to export dobs to obs!')
121        if len(o.deltas.keys()) != nr:
122            raise Exception('Incompatible obses in list')
123    od['observables'] = {}
124    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
125    od['observables']['origin'] = {
126        'who': getpass.getuser(),
127        'date': str(datetime.datetime.now())[:-7],
128        'host': socket.gethostname(),
129        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
130    od['observables']['pobs'] = {}
131    pd = od['observables']['pobs']
132    pd['spec'] = spec
133    pd['origin'] = origin
134    pd['name'] = name
135    if enstag:
136        if not isinstance(enstag, str):
137            raise Exception('enstag has to be a string!')
138        pd['enstag'] = enstag
139    else:
140        pd['enstag'] = ename
141    pd['nr'] = '%d' % (nr)
142    pd['array'] = []
143    osymbol = 'cfg'
144    if not isinstance(symbol, list):
145        raise Exception('Symbol has to be a list!')
146    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
147        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
148    for s in symbol:
149        osymbol += ' %s' % s
150    for r in range(nr):
151        ad = {}
152        ad['id'] = onames[r]
153        Nconf = len(obsl[0].deltas[names[r]])
154        layout = '%d i f%d' % (Nconf, len(obsl))
155        ad['layout'] = layout
156        ad['symbol'] = osymbol
157        data = ''
158        for c in range(Nconf):
159            data += '%d ' % obsl[0].idl[names[r]][c]
160            for o in obsl:
161                num = o.deltas[names[r]][c] + o.r_values[names[r]]
162                if num == 0:
163                    data += '0 '
164                else:
165                    data += '%1.16e ' % (num)
166            data += '\n'
167        ad['#data'] = data
168        pd['array'].append(ad)
169
170    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
171    return rs
172
173
174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
175    """Export a list of Obs or structures containing Obs to a .xml.gz file
176    according to the Zeuthen pobs format.
177
178    Tags are not written or recovered automatically. The separator | is removed from the replica names.
179
180    Parameters
181    ----------
182    obsl : list
183        List of Obs that will be exported.
184        The Obs inside a structure have to be defined on the same ensemble.
185    fname : str
186        Filename of the output file.
187    name : str
188        The name of the observable.
189    spec : str
190        Optional string that describes the contents of the file.
191    origin : str
192        Specify where the data has its origin.
193    symbol : list
194        A list of symbols that describe the observables to be written. May be empty.
195    enstag : str
196        Enstag that is written to pobs. If None, the ensemble name is used.
197    gz : bool
198        If True, the output is a gzipped xml. If False, the output is an xml file.
199    """
200    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
201
202    if not fname.endswith('.xml') and not fname.endswith('.gz'):
203        fname += '.xml'
204
205    if gz:
206        if not fname.endswith('.gz'):
207            fname += '.gz'
208
209        fp = gzip.open(fname, 'wb')
210        fp.write(pobsstring.encode('utf-8'))
211    else:
212        fp = open(fname, 'w', encoding='utf-8')
213        fp.write(pobsstring)
214    fp.close()
215
216
217def _import_data(string):
218    return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]")
219
220
221def _check(condition):
222    if not condition:
223        raise Exception("XML file format not supported")
224
225
226class _NoTagInDataError(Exception):
227    """Raised when tag is not in data"""
228    def __init__(self, tag):
229        self.tag = tag
230        super().__init__('Tag %s not in data!' % (self.tag))
231
232
233def _find_tag(dat, tag):
234    for i in range(len(dat)):
235        if dat[i].tag == tag:
236            return i
237    raise _NoTagInDataError(tag)
238
239
240def _import_array(arr):
241    name = arr[_find_tag(arr, 'id')].text.strip()
242    index = _find_tag(arr, 'layout')
243    try:
244        sindex = _find_tag(arr, 'symbol')
245    except _NoTagInDataError:
246        sindex = 0
247    if sindex > index:
248        tmp = _import_data(arr[sindex].tail)
249    else:
250        tmp = _import_data(arr[index].tail)
251
252    li = arr[index].text.strip()
253    m = li.split()
254    if m[1] == "i" and m[2][0] == "f":
255        nc = int(m[0])
256        na = int(m[2].lstrip('f'))
257        _dat = []
258        mask = []
259        for a in range(na):
260            mask += [a]
261            _dat += [np.array(tmp[1 + a:: na + 1])]
262        _check(len(tmp[0:: na + 1]) == nc)
263        return [name, tmp[0:: na + 1], mask, _dat]
264    elif m[1][0] == 'f' and len(m) < 3:
265        sh = (int(m[0]), int(m[1].lstrip('f')))
266        return np.reshape(tmp, sh)
267    elif any(['f' in s for s in m]):
268        for si in range(len(m)):
269            if m[si] == 'f':
270                break
271        sh = [int(m[i]) for i in range(si)]
272        return np.reshape(tmp, sh)
273    else:
274        print(name, m)
275        _check(False)
276
277
278def _import_rdata(rd):
279    name, idx, mask, deltas = _import_array(rd)
280    return deltas, name, idx
281
282
283def _import_cdata(cd):
284    _check(cd[0].tag == "id")
285    _check(cd[1][0].text.strip() == "cov")
286    cov = _import_array(cd[1])
287    grad = _import_array(cd[2])
288    return cd[0].text.strip(), cov, grad
289
290
291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
292    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
293
294    Tags are not written or recovered automatically.
295
296    Parameters
297    ----------
298    fname : str
299        Filename of the input file.
300    full_output : bool
301        If True, a dict containing auxiliary information and the data is returned.
302        If False, only the data is returned as list.
303    separatior_insertion: str or int
304        str: replace all occurences of "separator_insertion" within the replica names
305        by "|%s" % (separator_insertion) when constructing the names of the replica.
306        int: Insert the separator "|" at the position given by separator_insertion.
307        None (default): Replica names remain unchanged.
308    """
309
310    if not fname.endswith('.xml') and not fname.endswith('.gz'):
311        fname += '.xml'
312    if gz:
313        if not fname.endswith('.gz'):
314            fname += '.gz'
315        with gzip.open(fname, 'r') as fin:
316            content = fin.read()
317    else:
318        if fname.endswith('.gz'):
319            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
320        with open(fname, 'r') as fin:
321            content = fin.read()
322
323    # parse xml file content
324    root = et.fromstring(content)
325
326    _check(root[2].tag == 'pobs')
327    pobs = root[2]
328
329    version = root[0][1].text.strip()
330
331    _check(root[1].tag == 'origin')
332    file_origin = _etree_to_dict(root[1])['origin']
333
334    deltas = []
335    names = []
336    idl = []
337    for i in range(5, len(pobs)):
338        delta, name, idx = _import_rdata(pobs[i])
339        deltas.append(delta)
340        if separator_insertion is None:
341            pass
342        elif isinstance(separator_insertion, int):
343            name = name[:separator_insertion] + '|' + name[separator_insertion:]
344        elif isinstance(separator_insertion, str):
345            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
346        else:
347            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
348        names.append(name)
349        idl.append(idx)
350    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
351
352    descriptiond = {}
353    for i in range(4):
354        descriptiond[pobs[i].tag] = pobs[i].text.strip()
355
356    _check(pobs[4].tag == "nr")
357
358    _check(pobs[5].tag == 'array')
359    if pobs[5][1].tag == 'symbol':
360        symbol = pobs[5][1].text.strip()
361        descriptiond['symbol'] = symbol
362
363    if full_output:
364        retd = {}
365        tool = file_origin.get('tool', None)
366        if tool:
367            program = tool['name'] + ' ' + tool['version']
368        else:
369            program = ''
370        retd['program'] = program
371        retd['version'] = version
372        retd['who'] = file_origin['who']
373        retd['date'] = file_origin['date']
374        retd['host'] = file_origin['host']
375        retd['description'] = descriptiond
376        retd['obsdata'] = res
377        return retd
378    else:
379        return res
380
381
382# Reading (and writing) dobs is not yet working properly:
383# we have to loop over root[2:] because each entry is a dobs
384# But maybe this is just a problem with Ben's implementation
385
386# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
388    """Import a list of Obs from a string in the Zeuthen dobs format.
389
390    Tags are not written or recovered automatically.
391
392    Parameters
393    ----------
394    content : str
395        XML string containing the data
396    noemtpy : bool
397        If True, ensembles with no contribution to the Obs are not included.
398        If False, ensembles are included as written in the file, possibly with vanishing entries.
399    full_output : bool
400        If True, a dict containing auxiliary information and the data is returned.
401        If False, only the data is returned as list.
402    separatior_insertion: str, int or bool
403        str: replace all occurences of "separator_insertion" within the replica names
404        by "|%s" % (separator_insertion) when constructing the names of the replica.
405        int: Insert the separator "|" at the position given by separator_insertion.
406        True (default): separator "|" is inserted after len(ensname), assuming that the
407        ensemble name is a prefix to the replica name.
408        None or False: No separator is inserted.
409    """
410
411    root = et.fromstring(content)
412
413    _check(root.tag == 'OBSERVABLES')
414    _check(root[0].tag == 'SCHEMA')
415    version = root[0][1].text.strip()
416
417    _check(root[1].tag == 'origin')
418    file_origin = _etree_to_dict(root[1])['origin']
419
420    _check(root[2].tag == 'dobs')
421
422    dobs = root[2]
423
424    descriptiond = {}
425    for i in range(3):
426        descriptiond[dobs[i].tag] = dobs[i].text.strip()
427
428    _check(dobs[3].tag == 'array')
429
430    symbol = []
431    if dobs[3][1].tag == 'symbol':
432        symbol = dobs[3][1].text.strip()
433        descriptiond['symbol'] = symbol
434    mean = _import_array(dobs[3])[0]
435
436    _check(dobs[4].tag == "ne")
437    ne = int(dobs[4].text.strip())
438    _check(dobs[5].tag == "nc")
439    nc = int(dobs[5].text.strip())
440
441    idld = {}
442    deltad = {}
443    covd = {}
444    gradd = {}
445    names = []
446    e_names = []
447    enstags = {}
448    for k in range(6, len(list(dobs))):
449        if dobs[k].tag == "edata":
450            _check(dobs[k][0].tag == "enstag")
451            ename = dobs[k][0].text.strip()
452            e_names.append(ename)
453            _check(dobs[k][1].tag == "nr")
454            R = int(dobs[k][1].text.strip())
455            for i in range(2, 2 + R):
456                deltas, rname, idx = _import_rdata(dobs[k][i])
457                if separator_insertion is None or False:
458                    pass
459                elif separator_insertion is True:
460                    if rname.startswith(ename):
461                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
462                elif isinstance(separator_insertion, int):
463                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
464                elif isinstance(separator_insertion, str):
465                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
466                else:
467                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
468                if '|' in rname:
469                    new_ename = rname[:rname.index('|')]
470                else:
471                    new_ename = ename
472                enstags[new_ename] = ename
473                idld[rname] = idx
474                deltad[rname] = deltas
475                names.append(rname)
476        elif dobs[k].tag == "cdata":
477            cname, cov, grad = _import_cdata(dobs[k])
478            covd[cname] = cov
479            if grad.shape[1] == 1:
480                gradd[cname] = [grad for i in range(len(mean))]
481            else:
482                gradd[cname] = grad.T
483        else:
484            _check(False)
485    names = list(set(names))
486
487    for name in names:
488        for i in range(len(deltad[name])):
489            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
490
491    res = []
492    for i in range(len(mean)):
493        deltas = []
494        idl = []
495        obs_names = []
496        for name in names:
497            h = np.unique(deltad[name][i])
498            if len(h) == 1 and np.all(h == mean[i]) and noempty:
499                continue
500            deltas.append(deltad[name][i])
501            obs_names.append(name)
502            idl.append(idld[name])
503        res.append(Obs(deltas, obs_names, idl=idl))
504    print(mean, 'vs', res)
505    _check(len(e_names) == ne)
506
507    cnames = list(covd.keys())
508    for i in range(len(res)):
509        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
510        if noempty:
511            for name in cnames:
512                if np.all(new_covobs[name].grad == 0):
513                    del new_covobs[name]
514            cnames_loc = list(new_covobs.keys())
515        else:
516            cnames_loc = cnames
517        for name in cnames_loc:
518            res[i].names.append(name)
519            res[i].shape[name] = 1
520            res[i].idl[name] = []
521        res[i]._covobs = new_covobs
522
523    if symbol:
524        for i in range(len(res)):
525            res[i].tag = symbol[i]
526            if res[i].tag == 'None':
527                res[i].tag = None
528    if not noempty:
529        _check(len(res[0].covobs.keys()) == nc)
530    if full_output:
531        retd = {}
532        tool = file_origin.get('tool', None)
533        if tool:
534            program = tool['name'] + ' ' + tool['version']
535        else:
536            program = ''
537        retd['program'] = program
538        retd['version'] = version
539        retd['who'] = file_origin['who']
540        retd['date'] = file_origin['date']
541        retd['host'] = file_origin['host']
542        retd['description'] = descriptiond
543        retd['enstags'] = enstags
544        retd['obsdata'] = res
545        return retd
546    else:
547        return res
548
549
550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
551    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
552
553    Tags are not written or recovered automatically.
554
555    Parameters
556    ----------
557    fname : str
558        Filename of the input file.
559    noemtpy : bool
560        If True, ensembles with no contribution to the Obs are not included.
561        If False, ensembles are included as written in the file.
562    full_output : bool
563        If True, a dict containing auxiliary information and the data is returned.
564        If False, only the data is returned as list.
565    gz : bool
566        If True, assumes that data is gzipped. If False, assumes XML file.
567    separatior_insertion: str, int or bool
568        str: replace all occurences of "separator_insertion" within the replica names
569        by "|%s" % (separator_insertion) when constructing the names of the replica.
570        int: Insert the separator "|" at the position given by separator_insertion.
571        True (default): separator "|" is inserted after len(ensname), assuming that the
572        ensemble name is a prefix to the replica name.
573        None or False: No separator is inserted.
574    """
575
576    if not fname.endswith('.xml') and not fname.endswith('.gz'):
577        fname += '.xml'
578    if gz:
579        if not fname.endswith('.gz'):
580            fname += '.gz'
581        with gzip.open(fname, 'r') as fin:
582            content = fin.read().decode('utf-8')
583    else:
584        if fname.endswith('.gz'):
585            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
586        with open(fname, 'r', encoding='utf-8') as fin:
587            content = fin.read()
588
589    # open and read gzipped xml file
590    infile = gzip.open(fname)
591    content = infile.read()
592
593    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
594
595
596def _dobsdict_to_xmlstring(d):
597    if isinstance(d, dict):
598        iters = ''
599        for k in d:
600            if k.startswith('#value'):
601                for li in d[k]:
602                    iters += li
603                return iters + '\n'
604            elif k.startswith('#'):
605                for li in d[k]:
606                    iters += li
607                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
608                return iters
609            if isinstance(d[k], dict):
610                iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
611            elif isinstance(d[k], str):
612                if len(d[k]) > 100:
613                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
614                else:
615                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
616            elif isinstance(d[k], list):
617                tmps = ''
618                if k in ['edata', 'cdata']:
619                    for i in range(len(d[k])):
620                        tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k)
621                else:
622                    for i in range(len(d[k])):
623                        tmps += _dobsdict_to_xmlstring(d[k][i])
624                iters += tmps
625            elif isinstance(d[k], (int, float)):
626                iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k)
627            elif not d[k]:
628                return '\n'
629            else:
630                raise Exception('Type', type(d[k]), 'not supported in export!')
631    else:
632        raise Exception('Type', type(d), 'not supported in export!')
633    return iters
634
635
636def _dobsdict_to_xmlstring_spaces(d, space='  '):
637    s = _dobsdict_to_xmlstring(d)
638    o = ''
639    c = 0
640    cm = False
641    for li in s.split('\n'):
642        if li.startswith('<%s' % ('/')):
643            c -= 1
644            cm = True
645        for i in range(c):
646            o += space
647        o += li + '\n'
648        if li.startswith('<') and not cm:
649            if not '<%s' % ('/') in li:
650                c += 1
651        cm = False
652    return o
653
654
655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}):
656    """Generate the string for the export of a list of Obs or structures containing Obs
657    to a .xml.gz file according to the Zeuthen dobs format.
658
659    Tags are not written or recovered automatically. The separator |is removed from the replica names.
660
661    Parameters
662    ----------
663    obsl : list
664        List of Obs that will be exported.
665        The Obs inside a structure do not have to be defined on the same set of configurations,
666        but the storage requirement is increased, if this is not the case.
667    name : str
668        The name of the observable.
669    spec : str
670        Optional string that describes the contents of the file.
671    origin : str
672        Specify where the data has its origin.
673    symbol : list
674        A list of symbols that describe the observables to be written. May be empty.
675    who : str
676        Provide the name of the person that exports the data.
677    enstags : dict
678        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
679        Otherwise, the ensemble name is used.
680    """
681    od = {}
682    r_names = []
683    for o in obsl:
684        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
685    r_names = sorted(set(r_names))
686    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
687    for tmpname in mc_names:
688        if tmpname not in enstags:
689            enstags[tmpname] = tmpname
690    ne = len(set(mc_names))
691    cov_names = []
692    for o in obsl:
693        cov_names += list(o.cov_names)
694    cov_names = sorted(set(cov_names))
695    nc = len(set(cov_names))
696    od['OBSERVABLES'] = {}
697    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
698    if who is None:
699        who = getpass.getuser()
700    od['OBSERVABLES']['origin'] = {
701        'who': who,
702        'date': str(datetime.datetime.now())[:-7],
703        'host': socket.gethostname(),
704        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
705    od['OBSERVABLES']['dobs'] = {}
706    pd = od['OBSERVABLES']['dobs']
707    pd['spec'] = spec
708    pd['origin'] = origin
709    pd['name'] = name
710    pd['array'] = {}
711    pd['array']['id'] = 'val'
712    pd['array']['layout'] = '1 f%d' % (len(obsl))
713    osymbol = ''
714    if symbol:
715        if not isinstance(symbol, list):
716            raise Exception('Symbol has to be a list!')
717        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
718            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
719        osymbol = symbol[0]
720        for s in symbol[1:]:
721            osymbol += ' %s' % s
722        pd['array']['symbol'] = osymbol
723
724    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
725    pd['ne'] = '%d' % (ne)
726    pd['nc'] = '%d' % (nc)
727    pd['edata'] = []
728    for name in mc_names:
729        ed = {}
730        ed['enstag'] = enstags[name]
731        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
732        nr = len(onames)
733        ed['nr'] = nr
734        ed[''] = []
735
736        for r in range(nr):
737            ad = {}
738            repname = onames[r]
739            ad['id'] = repname.replace('|', '')
740            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
741            Nconf = len(idx)
742            layout = '%d i f%d' % (Nconf, len(obsl))
743            ad['layout'] = layout
744            data = ''
745            counters = [0 for o in obsl]
746            for ci in idx:
747                data += '%d ' % ci
748                for oi in range(len(obsl)):
749                    o = obsl[oi]
750                    if repname in o.idl:
751                        if counters[oi] < 0:
752                            data += '0 '
753                            continue
754                        if o.idl[repname][counters[oi]] == ci:
755                            num = o.deltas[repname][counters[oi]]
756                            if num == 0:
757                                data += '0 '
758                            else:
759                                data += '%1.16e ' % (num)
760                            counters[oi] += 1
761                            if counters[oi] >= len(o.idl[repname]):
762                                counters[oi] = -1
763                        else:
764                            data += '0 '
765                    else:
766                        data += '0 '
767                data += '\n'
768            ad['#data'] = data
769            ed[''].append(ad)
770        pd['edata'].append(ed)
771
772        allcov = {}
773        for o in obsl:
774            for name in o.cov_names:
775                if name in allcov:
776                    if not np.array_equal(allcov[name], o.covobs[name].cov):
777                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
778                else:
779                    allcov[name] = o.covobs[name].cov
780        pd['cdata'] = []
781        for name in cov_names:
782            cd = {}
783            cd['id'] = name
784
785            covd = {'id': 'cov'}
786            if allcov[name].shape == ():
787                ncov = 1
788                covd['layout'] = '1 1 f'
789                covd['#data'] = '%1.14e' % (allcov[name])
790            else:
791                shape = allcov[name].shape
792                assert (shape[0] == shape[1])
793                ncov = shape[0]
794                covd['layout'] = '%d %d f' % (ncov, ncov)
795                ds = ''
796                for i in range(ncov):
797                    for j in range(ncov):
798                        val = allcov[name][i][j]
799                        if val == 0:
800                            ds += '0 '
801                        else:
802                            ds += '%1.14e ' % (val)
803                    ds += '\n'
804                covd['#data'] = ds
805
806            gradd = {'id': 'grad'}
807            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
808            ds = ''
809            for i in range(ncov):
810                for o in obsl:
811                    if name in o.covobs:
812                        val = o.covobs[name].grad[i]
813                        if val != 0:
814                            ds += '%1.14e ' % (val)
815                        else:
816                            ds += '0 '
817                    else:
818                        ds += '0 '
819            gradd['#data'] = ds
820            cd['array'] = [covd, gradd]
821            pd['cdata'].append(cd)
822
823    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
824
825    return rs
826
827
828def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True):
829    """Export a list of Obs or structures containing Obs to a .xml.gz file
830    according to the Zeuthen dobs format.
831
832    Tags are not written or recovered automatically. The separator | is removed from the replica names.
833
834    Parameters
835    ----------
836    obsl : list
837        List of Obs that will be exported.
838        The Obs inside a structure do not have to be defined on the same set of configurations,
839        but the storage requirement is increased, if this is not the case.
840    fname : str
841        Filename of the output file.
842    name : str
843        The name of the observable.
844    spec : str
845        Optional string that describes the contents of the file.
846    origin : str
847        Specify where the data has its origin.
848    symbol : list
849        A list of symbols that describe the observables to be written. May be empty.
850    who : str
851        Provide the name of the person that exports the data.
852    enstags : dict
853        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
854        Otherwise, the ensemble name is used.
855    gz : bool
856        If True, the output is a gzipped XML. If False, the output is a XML file.
857    """
858
859    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
860
861    if not fname.endswith('.xml') and not fname.endswith('.gz'):
862        fname += '.xml'
863
864    if gz:
865        if not fname.endswith('.gz'):
866            fname += '.gz'
867
868        fp = gzip.open(fname, 'wb')
869        fp.write(dobsstring.encode('utf-8'))
870    else:
871        fp = open(fname, 'w', encoding='utf-8')
872        fp.write(dobsstring)
873    fp.close()
def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None)
 89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 90    """Export a list of Obs or structures containing Obs to an xml string
 91    according to the Zeuthen pobs format.
 92
 93    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 94
 95    Parameters
 96    ----------
 97    obsl : list
 98        List of Obs that will be exported.
 99        The Obs inside a structure have to be defined on the same ensemble.
100    name : str
101        The name of the observable.
102    spec : str
103        Optional string that describes the contents of the file.
104    origin : str
105        Specify where the data has its origin.
106    symbol : list
107        A list of symbols that describe the observables to be written. May be empty.
108    enstag : str
109        Enstag that is written to pobs. If None, the ensemble name is used.
110    """
111
112    od = {}
113    ename = obsl[0].e_names[0]
114    names = list(obsl[0].deltas.keys())
115    nr = len(names)
116    onames = [name.replace('|', '') for name in names]
117    for o in obsl:
118        if len(o.e_names) != 1:
119            raise Exception('You try to export dobs to obs!')
120        if o.e_names[0] != ename:
121            raise Exception('You try to export dobs to obs!')
122        if len(o.deltas.keys()) != nr:
123            raise Exception('Incompatible obses in list')
124    od['observables'] = {}
125    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
126    od['observables']['origin'] = {
127        'who': getpass.getuser(),
128        'date': str(datetime.datetime.now())[:-7],
129        'host': socket.gethostname(),
130        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
131    od['observables']['pobs'] = {}
132    pd = od['observables']['pobs']
133    pd['spec'] = spec
134    pd['origin'] = origin
135    pd['name'] = name
136    if enstag:
137        if not isinstance(enstag, str):
138            raise Exception('enstag has to be a string!')
139        pd['enstag'] = enstag
140    else:
141        pd['enstag'] = ename
142    pd['nr'] = '%d' % (nr)
143    pd['array'] = []
144    osymbol = 'cfg'
145    if not isinstance(symbol, list):
146        raise Exception('Symbol has to be a list!')
147    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
148        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
149    for s in symbol:
150        osymbol += ' %s' % s
151    for r in range(nr):
152        ad = {}
153        ad['id'] = onames[r]
154        Nconf = len(obsl[0].deltas[names[r]])
155        layout = '%d i f%d' % (Nconf, len(obsl))
156        ad['layout'] = layout
157        ad['symbol'] = osymbol
158        data = ''
159        for c in range(Nconf):
160            data += '%d ' % obsl[0].idl[names[r]][c]
161            for o in obsl:
162                num = o.deltas[names[r]][c] + o.r_values[names[r]]
163                if num == 0:
164                    data += '0 '
165                else:
166                    data += '%1.16e ' % (num)
167            data += '\n'
168        ad['#data'] = data
169        pd['array'].append(ad)
170
171    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
172    return rs

Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
def write_pobs( obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True)
175def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
176    """Export a list of Obs or structures containing Obs to a .xml.gz file
177    according to the Zeuthen pobs format.
178
179    Tags are not written or recovered automatically. The separator | is removed from the replica names.
180
181    Parameters
182    ----------
183    obsl : list
184        List of Obs that will be exported.
185        The Obs inside a structure have to be defined on the same ensemble.
186    fname : str
187        Filename of the output file.
188    name : str
189        The name of the observable.
190    spec : str
191        Optional string that describes the contents of the file.
192    origin : str
193        Specify where the data has its origin.
194    symbol : list
195        A list of symbols that describe the observables to be written. May be empty.
196    enstag : str
197        Enstag that is written to pobs. If None, the ensemble name is used.
198    gz : bool
199        If True, the output is a gzipped xml. If False, the output is an xml file.
200    """
201    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
202
203    if not fname.endswith('.xml') and not fname.endswith('.gz'):
204        fname += '.xml'
205
206    if gz:
207        if not fname.endswith('.gz'):
208            fname += '.gz'
209
210        fp = gzip.open(fname, 'wb')
211        fp.write(pobsstring.encode('utf-8'))
212    else:
213        fp = open(fname, 'w', encoding='utf-8')
214        fp.write(pobsstring)
215    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
def read_pobs(fname, full_output=False, gz=True, separator_insertion=None)
292def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
293    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
294
295    Tags are not written or recovered automatically.
296
297    Parameters
298    ----------
299    fname : str
300        Filename of the input file.
301    full_output : bool
302        If True, a dict containing auxiliary information and the data is returned.
303        If False, only the data is returned as list.
304    separatior_insertion: str or int
305        str: replace all occurences of "separator_insertion" within the replica names
306        by "|%s" % (separator_insertion) when constructing the names of the replica.
307        int: Insert the separator "|" at the position given by separator_insertion.
308        None (default): Replica names remain unchanged.
309    """
310
311    if not fname.endswith('.xml') and not fname.endswith('.gz'):
312        fname += '.xml'
313    if gz:
314        if not fname.endswith('.gz'):
315            fname += '.gz'
316        with gzip.open(fname, 'r') as fin:
317            content = fin.read()
318    else:
319        if fname.endswith('.gz'):
320            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
321        with open(fname, 'r') as fin:
322            content = fin.read()
323
324    # parse xml file content
325    root = et.fromstring(content)
326
327    _check(root[2].tag == 'pobs')
328    pobs = root[2]
329
330    version = root[0][1].text.strip()
331
332    _check(root[1].tag == 'origin')
333    file_origin = _etree_to_dict(root[1])['origin']
334
335    deltas = []
336    names = []
337    idl = []
338    for i in range(5, len(pobs)):
339        delta, name, idx = _import_rdata(pobs[i])
340        deltas.append(delta)
341        if separator_insertion is None:
342            pass
343        elif isinstance(separator_insertion, int):
344            name = name[:separator_insertion] + '|' + name[separator_insertion:]
345        elif isinstance(separator_insertion, str):
346            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
347        else:
348            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
349        names.append(name)
350        idl.append(idx)
351    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
352
353    descriptiond = {}
354    for i in range(4):
355        descriptiond[pobs[i].tag] = pobs[i].text.strip()
356
357    _check(pobs[4].tag == "nr")
358
359    _check(pobs[5].tag == 'array')
360    if pobs[5][1].tag == 'symbol':
361        symbol = pobs[5][1].text.strip()
362        descriptiond['symbol'] = symbol
363
364    if full_output:
365        retd = {}
366        tool = file_origin.get('tool', None)
367        if tool:
368            program = tool['name'] + ' ' + tool['version']
369        else:
370            program = ''
371        retd['program'] = program
372        retd['version'] = version
373        retd['who'] = file_origin['who']
374        retd['date'] = file_origin['date']
375        retd['host'] = file_origin['host']
376        retd['description'] = descriptiond
377        retd['obsdata'] = res
378        return retd
379    else:
380        return res

Import a list of Obs from an xml.gz file in the Zeuthen pobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True)
388def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
389    """Import a list of Obs from a string in the Zeuthen dobs format.
390
391    Tags are not written or recovered automatically.
392
393    Parameters
394    ----------
395    content : str
396        XML string containing the data
397    noemtpy : bool
398        If True, ensembles with no contribution to the Obs are not included.
399        If False, ensembles are included as written in the file, possibly with vanishing entries.
400    full_output : bool
401        If True, a dict containing auxiliary information and the data is returned.
402        If False, only the data is returned as list.
403    separatior_insertion: str, int or bool
404        str: replace all occurences of "separator_insertion" within the replica names
405        by "|%s" % (separator_insertion) when constructing the names of the replica.
406        int: Insert the separator "|" at the position given by separator_insertion.
407        True (default): separator "|" is inserted after len(ensname), assuming that the
408        ensemble name is a prefix to the replica name.
409        None or False: No separator is inserted.
410    """
411
412    root = et.fromstring(content)
413
414    _check(root.tag == 'OBSERVABLES')
415    _check(root[0].tag == 'SCHEMA')
416    version = root[0][1].text.strip()
417
418    _check(root[1].tag == 'origin')
419    file_origin = _etree_to_dict(root[1])['origin']
420
421    _check(root[2].tag == 'dobs')
422
423    dobs = root[2]
424
425    descriptiond = {}
426    for i in range(3):
427        descriptiond[dobs[i].tag] = dobs[i].text.strip()
428
429    _check(dobs[3].tag == 'array')
430
431    symbol = []
432    if dobs[3][1].tag == 'symbol':
433        symbol = dobs[3][1].text.strip()
434        descriptiond['symbol'] = symbol
435    mean = _import_array(dobs[3])[0]
436
437    _check(dobs[4].tag == "ne")
438    ne = int(dobs[4].text.strip())
439    _check(dobs[5].tag == "nc")
440    nc = int(dobs[5].text.strip())
441
442    idld = {}
443    deltad = {}
444    covd = {}
445    gradd = {}
446    names = []
447    e_names = []
448    enstags = {}
449    for k in range(6, len(list(dobs))):
450        if dobs[k].tag == "edata":
451            _check(dobs[k][0].tag == "enstag")
452            ename = dobs[k][0].text.strip()
453            e_names.append(ename)
454            _check(dobs[k][1].tag == "nr")
455            R = int(dobs[k][1].text.strip())
456            for i in range(2, 2 + R):
457                deltas, rname, idx = _import_rdata(dobs[k][i])
458                if separator_insertion is None or False:
459                    pass
460                elif separator_insertion is True:
461                    if rname.startswith(ename):
462                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
463                elif isinstance(separator_insertion, int):
464                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
465                elif isinstance(separator_insertion, str):
466                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
467                else:
468                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
469                if '|' in rname:
470                    new_ename = rname[:rname.index('|')]
471                else:
472                    new_ename = ename
473                enstags[new_ename] = ename
474                idld[rname] = idx
475                deltad[rname] = deltas
476                names.append(rname)
477        elif dobs[k].tag == "cdata":
478            cname, cov, grad = _import_cdata(dobs[k])
479            covd[cname] = cov
480            if grad.shape[1] == 1:
481                gradd[cname] = [grad for i in range(len(mean))]
482            else:
483                gradd[cname] = grad.T
484        else:
485            _check(False)
486    names = list(set(names))
487
488    for name in names:
489        for i in range(len(deltad[name])):
490            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
491
492    res = []
493    for i in range(len(mean)):
494        deltas = []
495        idl = []
496        obs_names = []
497        for name in names:
498            h = np.unique(deltad[name][i])
499            if len(h) == 1 and np.all(h == mean[i]) and noempty:
500                continue
501            deltas.append(deltad[name][i])
502            obs_names.append(name)
503            idl.append(idld[name])
504        res.append(Obs(deltas, obs_names, idl=idl))
505    print(mean, 'vs', res)
506    _check(len(e_names) == ne)
507
508    cnames = list(covd.keys())
509    for i in range(len(res)):
510        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
511        if noempty:
512            for name in cnames:
513                if np.all(new_covobs[name].grad == 0):
514                    del new_covobs[name]
515            cnames_loc = list(new_covobs.keys())
516        else:
517            cnames_loc = cnames
518        for name in cnames_loc:
519            res[i].names.append(name)
520            res[i].shape[name] = 1
521            res[i].idl[name] = []
522        res[i]._covobs = new_covobs
523
524    if symbol:
525        for i in range(len(res)):
526            res[i].tag = symbol[i]
527            if res[i].tag == 'None':
528                res[i].tag = None
529    if not noempty:
530        _check(len(res[0].covobs.keys()) == nc)
531    if full_output:
532        retd = {}
533        tool = file_origin.get('tool', None)
534        if tool:
535            program = tool['name'] + ' ' + tool['version']
536        else:
537            program = ''
538        retd['program'] = program
539        retd['version'] = version
540        retd['who'] = file_origin['who']
541        retd['date'] = file_origin['date']
542        retd['host'] = file_origin['host']
543        retd['description'] = descriptiond
544        retd['enstags'] = enstags
545        retd['obsdata'] = res
546        return retd
547    else:
548        return res

Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • content (str): XML string containing the data
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
def read_dobs( fname, noempty=False, full_output=False, gz=True, separator_insertion=True)
551def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
552    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
553
554    Tags are not written or recovered automatically.
555
556    Parameters
557    ----------
558    fname : str
559        Filename of the input file.
560    noemtpy : bool
561        If True, ensembles with no contribution to the Obs are not included.
562        If False, ensembles are included as written in the file.
563    full_output : bool
564        If True, a dict containing auxiliary information and the data is returned.
565        If False, only the data is returned as list.
566    gz : bool
567        If True, assumes that data is gzipped. If False, assumes XML file.
568    separatior_insertion: str, int or bool
569        str: replace all occurences of "separator_insertion" within the replica names
570        by "|%s" % (separator_insertion) when constructing the names of the replica.
571        int: Insert the separator "|" at the position given by separator_insertion.
572        True (default): separator "|" is inserted after len(ensname), assuming that the
573        ensemble name is a prefix to the replica name.
574        None or False: No separator is inserted.
575    """
576
577    if not fname.endswith('.xml') and not fname.endswith('.gz'):
578        fname += '.xml'
579    if gz:
580        if not fname.endswith('.gz'):
581            fname += '.gz'
582        with gzip.open(fname, 'r') as fin:
583            content = fin.read().decode('utf-8')
584    else:
585        if fname.endswith('.gz'):
586            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
587        with open(fname, 'r', encoding='utf-8') as fin:
588            content = fin.read()
589
590    # open and read gzipped xml file
591    infile = gzip.open(fname)
592    content = infile.read()
593
594    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)

Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
def create_dobs_string( obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={})
656def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}):
657    """Generate the string for the export of a list of Obs or structures containing Obs
658    to a .xml.gz file according to the Zeuthen dobs format.
659
660    Tags are not written or recovered automatically. The separator |is removed from the replica names.
661
662    Parameters
663    ----------
664    obsl : list
665        List of Obs that will be exported.
666        The Obs inside a structure do not have to be defined on the same set of configurations,
667        but the storage requirement is increased, if this is not the case.
668    name : str
669        The name of the observable.
670    spec : str
671        Optional string that describes the contents of the file.
672    origin : str
673        Specify where the data has its origin.
674    symbol : list
675        A list of symbols that describe the observables to be written. May be empty.
676    who : str
677        Provide the name of the person that exports the data.
678    enstags : dict
679        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
680        Otherwise, the ensemble name is used.
681    """
682    od = {}
683    r_names = []
684    for o in obsl:
685        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
686    r_names = sorted(set(r_names))
687    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
688    for tmpname in mc_names:
689        if tmpname not in enstags:
690            enstags[tmpname] = tmpname
691    ne = len(set(mc_names))
692    cov_names = []
693    for o in obsl:
694        cov_names += list(o.cov_names)
695    cov_names = sorted(set(cov_names))
696    nc = len(set(cov_names))
697    od['OBSERVABLES'] = {}
698    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
699    if who is None:
700        who = getpass.getuser()
701    od['OBSERVABLES']['origin'] = {
702        'who': who,
703        'date': str(datetime.datetime.now())[:-7],
704        'host': socket.gethostname(),
705        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
706    od['OBSERVABLES']['dobs'] = {}
707    pd = od['OBSERVABLES']['dobs']
708    pd['spec'] = spec
709    pd['origin'] = origin
710    pd['name'] = name
711    pd['array'] = {}
712    pd['array']['id'] = 'val'
713    pd['array']['layout'] = '1 f%d' % (len(obsl))
714    osymbol = ''
715    if symbol:
716        if not isinstance(symbol, list):
717            raise Exception('Symbol has to be a list!')
718        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
719            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
720        osymbol = symbol[0]
721        for s in symbol[1:]:
722            osymbol += ' %s' % s
723        pd['array']['symbol'] = osymbol
724
725    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
726    pd['ne'] = '%d' % (ne)
727    pd['nc'] = '%d' % (nc)
728    pd['edata'] = []
729    for name in mc_names:
730        ed = {}
731        ed['enstag'] = enstags[name]
732        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
733        nr = len(onames)
734        ed['nr'] = nr
735        ed[''] = []
736
737        for r in range(nr):
738            ad = {}
739            repname = onames[r]
740            ad['id'] = repname.replace('|', '')
741            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
742            Nconf = len(idx)
743            layout = '%d i f%d' % (Nconf, len(obsl))
744            ad['layout'] = layout
745            data = ''
746            counters = [0 for o in obsl]
747            for ci in idx:
748                data += '%d ' % ci
749                for oi in range(len(obsl)):
750                    o = obsl[oi]
751                    if repname in o.idl:
752                        if counters[oi] < 0:
753                            data += '0 '
754                            continue
755                        if o.idl[repname][counters[oi]] == ci:
756                            num = o.deltas[repname][counters[oi]]
757                            if num == 0:
758                                data += '0 '
759                            else:
760                                data += '%1.16e ' % (num)
761                            counters[oi] += 1
762                            if counters[oi] >= len(o.idl[repname]):
763                                counters[oi] = -1
764                        else:
765                            data += '0 '
766                    else:
767                        data += '0 '
768                data += '\n'
769            ad['#data'] = data
770            ed[''].append(ad)
771        pd['edata'].append(ed)
772
773        allcov = {}
774        for o in obsl:
775            for name in o.cov_names:
776                if name in allcov:
777                    if not np.array_equal(allcov[name], o.covobs[name].cov):
778                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
779                else:
780                    allcov[name] = o.covobs[name].cov
781        pd['cdata'] = []
782        for name in cov_names:
783            cd = {}
784            cd['id'] = name
785
786            covd = {'id': 'cov'}
787            if allcov[name].shape == ():
788                ncov = 1
789                covd['layout'] = '1 1 f'
790                covd['#data'] = '%1.14e' % (allcov[name])
791            else:
792                shape = allcov[name].shape
793                assert (shape[0] == shape[1])
794                ncov = shape[0]
795                covd['layout'] = '%d %d f' % (ncov, ncov)
796                ds = ''
797                for i in range(ncov):
798                    for j in range(ncov):
799                        val = allcov[name][i][j]
800                        if val == 0:
801                            ds += '0 '
802                        else:
803                            ds += '%1.14e ' % (val)
804                    ds += '\n'
805                covd['#data'] = ds
806
807            gradd = {'id': 'grad'}
808            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
809            ds = ''
810            for i in range(ncov):
811                for o in obsl:
812                    if name in o.covobs:
813                        val = o.covobs[name].grad[i]
814                        if val != 0:
815                            ds += '%1.14e ' % (val)
816                        else:
817                            ds += '0 '
818                    else:
819                        ds += '0 '
820            gradd['#data'] = ds
821            cd['array'] = [covd, gradd]
822            pd['cdata'].append(cd)
823
824    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
825
826    return rs

Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator |is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
def write_dobs( obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True)
829def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True):
830    """Export a list of Obs or structures containing Obs to a .xml.gz file
831    according to the Zeuthen dobs format.
832
833    Tags are not written or recovered automatically. The separator | is removed from the replica names.
834
835    Parameters
836    ----------
837    obsl : list
838        List of Obs that will be exported.
839        The Obs inside a structure do not have to be defined on the same set of configurations,
840        but the storage requirement is increased, if this is not the case.
841    fname : str
842        Filename of the output file.
843    name : str
844        The name of the observable.
845    spec : str
846        Optional string that describes the contents of the file.
847    origin : str
848        Specify where the data has its origin.
849    symbol : list
850        A list of symbols that describe the observables to be written. May be empty.
851    who : str
852        Provide the name of the person that exports the data.
853    enstags : dict
854        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
855        Otherwise, the ensemble name is used.
856    gz : bool
857        If True, the output is a gzipped XML. If False, the output is a XML file.
858    """
859
860    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
861
862    if not fname.endswith('.xml') and not fname.endswith('.gz'):
863        fname += '.xml'
864
865    if gz:
866        if not fname.endswith('.gz'):
867            fname += '.gz'
868
869        fp = gzip.open(fname, 'wb')
870        fp.write(dobsstring.encode('utf-8'))
871    else:
872        fp = open(fname, 'w', encoding='utf-8')
873        fp.write(dobsstring)
874    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.