pyerrors.input.dobs

  1from collections import defaultdict
  2import gzip
  3import lxml.etree as et
  4import getpass
  5import socket
  6import datetime
  7import json
  8import warnings
  9import numpy as np
 10from ..obs import Obs
 11from ..obs import _merge_idx
 12from ..covobs import Covobs
 13from .. import version as pyerrorsversion
 14
 15
 16# Based on https://stackoverflow.com/a/10076823
 17def _etree_to_dict(t):
 18    """ Convert the content of an XML file to a python dict"""
 19    d = {t.tag: {} if t.attrib else None}
 20    children = list(t)
 21    if children:
 22        dd = defaultdict(list)
 23        for dc in map(_etree_to_dict, children):
 24            for k, v in dc.items():
 25                dd[k].append(v)
 26        d = {t.tag: {k: v[0] if len(v) == 1 else v
 27                     for k, v in dd.items()}}
 28    if t.attrib:
 29        d[t.tag].update(('@' + k, v)
 30                        for k, v in t.attrib.items())
 31    if t.text:
 32        text = t.text.strip()
 33        if children or t.attrib:
 34            if text:
 35                d[t.tag]['#data'] = [text]
 36        else:
 37            d[t.tag] = text
 38    return d
 39
 40
 41def _dict_to_xmlstring(d):
 42    if isinstance(d, dict):
 43        iters = ''
 44        for k in d:
 45            if k.startswith('#'):
 46                for la in d[k]:
 47                    iters += la
 48                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
 49                return iters
 50            if isinstance(d[k], dict):
 51                iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
 52            elif isinstance(d[k], str):
 53                if len(d[k]) > 100:
 54                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
 55                else:
 56                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
 57            elif isinstance(d[k], list):
 58                for i in range(len(d[k])):
 59                    iters += _dict_to_xmlstring(d[k][i])
 60            elif not d[k]:
 61                return '\n'
 62            else:
 63                raise Exception('Type', type(d[k]), 'not supported in export!')
 64    else:
 65        raise Exception('Type', type(d), 'not supported in export!')
 66    return iters
 67
 68
 69def _dict_to_xmlstring_spaces(d, space='  '):
 70    s = _dict_to_xmlstring(d)
 71    o = ''
 72    c = 0
 73    cm = False
 74    for li in s.split('\n'):
 75        if li.startswith('<%s' % ('/')):
 76            c -= 1
 77            cm = True
 78        for i in range(c):
 79            o += space
 80        o += li + '\n'
 81        if li.startswith('<') and not cm:
 82            if not '<%s' % ('/') in li:
 83                c += 1
 84        cm = False
 85    return o
 86
 87
 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89    """Export a list of Obs or structures containing Obs to an xml string
 90    according to the Zeuthen pobs format.
 91
 92    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 93
 94    Parameters
 95    ----------
 96    obsl : list
 97        List of Obs that will be exported.
 98        The Obs inside a structure have to be defined on the same ensemble.
 99    name : str
100        The name of the observable.
101    spec : str
102        Optional string that describes the contents of the file.
103    origin : str
104        Specify where the data has its origin.
105    symbol : list
106        A list of symbols that describe the observables to be written. May be empty.
107    enstag : str
108        Enstag that is written to pobs. If None, the ensemble name is used.
109
110    Returns
111    -------
112    xml_str : str
113        XML formatted string of the input data
114    """
115
116    od = {}
117    ename = obsl[0].e_names[0]
118    names = list(obsl[0].deltas.keys())
119    nr = len(names)
120    onames = [name.replace('|', '') for name in names]
121    for o in obsl:
122        if len(o.e_names) != 1:
123            raise Exception('You try to export dobs to obs!')
124        if o.e_names[0] != ename:
125            raise Exception('You try to export dobs to obs!')
126        if len(o.deltas.keys()) != nr:
127            raise Exception('Incompatible obses in list')
128    od['observables'] = {}
129    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
130    od['observables']['origin'] = {
131        'who': getpass.getuser(),
132        'date': str(datetime.datetime.now())[:-7],
133        'host': socket.gethostname(),
134        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
135    od['observables']['pobs'] = {}
136    pd = od['observables']['pobs']
137    pd['spec'] = spec
138    pd['origin'] = origin
139    pd['name'] = name
140    if enstag:
141        if not isinstance(enstag, str):
142            raise Exception('enstag has to be a string!')
143        pd['enstag'] = enstag
144    else:
145        pd['enstag'] = ename
146    pd['nr'] = '%d' % (nr)
147    pd['array'] = []
148    osymbol = 'cfg'
149    if not isinstance(symbol, list):
150        raise Exception('Symbol has to be a list!')
151    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
152        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
153    for s in symbol:
154        osymbol += ' %s' % s
155    for r in range(nr):
156        ad = {}
157        ad['id'] = onames[r]
158        Nconf = len(obsl[0].deltas[names[r]])
159        layout = '%d i f%d' % (Nconf, len(obsl))
160        ad['layout'] = layout
161        ad['symbol'] = osymbol
162        data = ''
163        for c in range(Nconf):
164            data += '%d ' % obsl[0].idl[names[r]][c]
165            for o in obsl:
166                num = o.deltas[names[r]][c] + o.r_values[names[r]]
167                if num == 0:
168                    data += '0 '
169                else:
170                    data += '%1.16e ' % (num)
171            data += '\n'
172        ad['#data'] = data
173        pd['array'].append(ad)
174
175    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
176    return rs
177
178
179def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
180    """Export a list of Obs or structures containing Obs to a .xml.gz file
181    according to the Zeuthen pobs format.
182
183    Tags are not written or recovered automatically. The separator | is removed from the replica names.
184
185    Parameters
186    ----------
187    obsl : list
188        List of Obs that will be exported.
189        The Obs inside a structure have to be defined on the same ensemble.
190    fname : str
191        Filename of the output file.
192    name : str
193        The name of the observable.
194    spec : str
195        Optional string that describes the contents of the file.
196    origin : str
197        Specify where the data has its origin.
198    symbol : list
199        A list of symbols that describe the observables to be written. May be empty.
200    enstag : str
201        Enstag that is written to pobs. If None, the ensemble name is used.
202    gz : bool
203        If True, the output is a gzipped xml. If False, the output is an xml file.
204
205    Returns
206    -------
207    None
208    """
209    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
210
211    if not fname.endswith('.xml') and not fname.endswith('.gz'):
212        fname += '.xml'
213
214    if gz:
215        if not fname.endswith('.gz'):
216            fname += '.gz'
217
218        fp = gzip.open(fname, 'wb')
219        fp.write(pobsstring.encode('utf-8'))
220    else:
221        fp = open(fname, 'w', encoding='utf-8')
222        fp.write(pobsstring)
223    fp.close()
224
225
226def _import_data(string):
227    return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]")
228
229
230def _check(condition):
231    if not condition:
232        raise Exception("XML file format not supported")
233
234
235class _NoTagInDataError(Exception):
236    """Raised when tag is not in data"""
237    def __init__(self, tag):
238        self.tag = tag
239        super().__init__('Tag %s not in data!' % (self.tag))
240
241
242def _find_tag(dat, tag):
243    for i in range(len(dat)):
244        if dat[i].tag == tag:
245            return i
246    raise _NoTagInDataError(tag)
247
248
249def _import_array(arr):
250    name = arr[_find_tag(arr, 'id')].text.strip()
251    index = _find_tag(arr, 'layout')
252    try:
253        sindex = _find_tag(arr, 'symbol')
254    except _NoTagInDataError:
255        sindex = 0
256    if sindex > index:
257        tmp = _import_data(arr[sindex].tail)
258    else:
259        tmp = _import_data(arr[index].tail)
260
261    li = arr[index].text.strip()
262    m = li.split()
263    if m[1] == "i" and m[2][0] == "f":
264        nc = int(m[0])
265        na = int(m[2].lstrip('f'))
266        _dat = []
267        mask = []
268        for a in range(na):
269            mask += [a]
270            _dat += [np.array(tmp[1 + a:: na + 1])]
271        _check(len(tmp[0:: na + 1]) == nc)
272        return [name, tmp[0:: na + 1], mask, _dat]
273    elif m[1][0] == 'f' and len(m) < 3:
274        sh = (int(m[0]), int(m[1].lstrip('f')))
275        return np.reshape(tmp, sh)
276    elif any(['f' in s for s in m]):
277        for si in range(len(m)):
278            if m[si] == 'f':
279                break
280        sh = [int(m[i]) for i in range(si)]
281        return np.reshape(tmp, sh)
282    else:
283        print(name, m)
284        _check(False)
285
286
287def _import_rdata(rd):
288    name, idx, mask, deltas = _import_array(rd)
289    return deltas, name, idx
290
291
292def _import_cdata(cd):
293    _check(cd[0].tag == "id")
294    _check(cd[1][0].text.strip() == "cov")
295    cov = _import_array(cd[1])
296    grad = _import_array(cd[2])
297    return cd[0].text.strip(), cov, grad
298
299
300def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
301    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
302
303    Tags are not written or recovered automatically.
304
305    Parameters
306    ----------
307    fname : str
308        Filename of the input file.
309    full_output : bool
310        If True, a dict containing auxiliary information and the data is returned.
311        If False, only the data is returned as list.
312    separatior_insertion: str or int
313        str: replace all occurences of "separator_insertion" within the replica names
314        by "|%s" % (separator_insertion) when constructing the names of the replica.
315        int: Insert the separator "|" at the position given by separator_insertion.
316        None (default): Replica names remain unchanged.
317
318    Returns
319    -------
320    res : list[Obs]
321        Imported data
322    or
323    res : dict
324        Imported data and meta-data
325    """
326
327    if not fname.endswith('.xml') and not fname.endswith('.gz'):
328        fname += '.xml'
329    if gz:
330        if not fname.endswith('.gz'):
331            fname += '.gz'
332        with gzip.open(fname, 'r') as fin:
333            content = fin.read()
334    else:
335        if fname.endswith('.gz'):
336            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
337        with open(fname, 'r') as fin:
338            content = fin.read()
339
340    # parse xml file content
341    root = et.fromstring(content)
342
343    _check(root[2].tag == 'pobs')
344    pobs = root[2]
345
346    version = root[0][1].text.strip()
347
348    _check(root[1].tag == 'origin')
349    file_origin = _etree_to_dict(root[1])['origin']
350
351    deltas = []
352    names = []
353    idl = []
354    for i in range(5, len(pobs)):
355        delta, name, idx = _import_rdata(pobs[i])
356        deltas.append(delta)
357        if separator_insertion is None:
358            pass
359        elif isinstance(separator_insertion, int):
360            name = name[:separator_insertion] + '|' + name[separator_insertion:]
361        elif isinstance(separator_insertion, str):
362            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
363        else:
364            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
365        names.append(name)
366        idl.append(idx)
367    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
368
369    descriptiond = {}
370    for i in range(4):
371        descriptiond[pobs[i].tag] = pobs[i].text.strip()
372
373    _check(pobs[4].tag == "nr")
374
375    _check(pobs[5].tag == 'array')
376    if pobs[5][1].tag == 'symbol':
377        symbol = pobs[5][1].text.strip()
378        descriptiond['symbol'] = symbol
379
380    if full_output:
381        retd = {}
382        tool = file_origin.get('tool', None)
383        if tool:
384            program = tool['name'] + ' ' + tool['version']
385        else:
386            program = ''
387        retd['program'] = program
388        retd['version'] = version
389        retd['who'] = file_origin['who']
390        retd['date'] = file_origin['date']
391        retd['host'] = file_origin['host']
392        retd['description'] = descriptiond
393        retd['obsdata'] = res
394        return retd
395    else:
396        return res
397
398
399# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
400def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
401    """Import a list of Obs from a string in the Zeuthen dobs format.
402
403    Tags are not written or recovered automatically.
404
405    Parameters
406    ----------
407    content : str
408        XML string containing the data
409    noemtpy : bool
410        If True, ensembles with no contribution to the Obs are not included.
411        If False, ensembles are included as written in the file, possibly with vanishing entries.
412    full_output : bool
413        If True, a dict containing auxiliary information and the data is returned.
414        If False, only the data is returned as list.
415    separatior_insertion: str, int or bool
416        str: replace all occurences of "separator_insertion" within the replica names
417        by "|%s" % (separator_insertion) when constructing the names of the replica.
418        int: Insert the separator "|" at the position given by separator_insertion.
419        True (default): separator "|" is inserted after len(ensname), assuming that the
420        ensemble name is a prefix to the replica name.
421        None or False: No separator is inserted.
422
423    Returns
424    -------
425    res : list[Obs]
426        Imported data
427    or
428    res : dict
429        Imported data and meta-data
430    """
431
432    root = et.fromstring(content)
433
434    _check(root.tag == 'OBSERVABLES')
435    _check(root[0].tag == 'SCHEMA')
436    version = root[0][1].text.strip()
437
438    _check(root[1].tag == 'origin')
439    file_origin = _etree_to_dict(root[1])['origin']
440
441    _check(root[2].tag == 'dobs')
442
443    dobs = root[2]
444
445    descriptiond = {}
446    for i in range(3):
447        descriptiond[dobs[i].tag] = dobs[i].text.strip()
448
449    _check(dobs[3].tag == 'array')
450
451    symbol = []
452    if dobs[3][1].tag == 'symbol':
453        symbol = dobs[3][1].text.strip()
454        descriptiond['symbol'] = symbol
455    mean = _import_array(dobs[3])[0]
456
457    _check(dobs[4].tag == "ne")
458    ne = int(dobs[4].text.strip())
459    _check(dobs[5].tag == "nc")
460    nc = int(dobs[5].text.strip())
461
462    idld = {}
463    deltad = {}
464    covd = {}
465    gradd = {}
466    names = []
467    e_names = []
468    enstags = {}
469    for k in range(6, len(list(dobs))):
470        if dobs[k].tag == "edata":
471            _check(dobs[k][0].tag == "enstag")
472            ename = dobs[k][0].text.strip()
473            e_names.append(ename)
474            _check(dobs[k][1].tag == "nr")
475            R = int(dobs[k][1].text.strip())
476            for i in range(2, 2 + R):
477                deltas, rname, idx = _import_rdata(dobs[k][i])
478                if separator_insertion is None or False:
479                    pass
480                elif separator_insertion is True:
481                    if rname.startswith(ename):
482                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
483                elif isinstance(separator_insertion, int):
484                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
485                elif isinstance(separator_insertion, str):
486                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
487                else:
488                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
489                if '|' in rname:
490                    new_ename = rname[:rname.index('|')]
491                else:
492                    new_ename = ename
493                enstags[new_ename] = ename
494                idld[rname] = idx
495                deltad[rname] = deltas
496                names.append(rname)
497        elif dobs[k].tag == "cdata":
498            cname, cov, grad = _import_cdata(dobs[k])
499            covd[cname] = cov
500            if grad.shape[1] == 1:
501                gradd[cname] = [grad for i in range(len(mean))]
502            else:
503                gradd[cname] = grad.T
504        else:
505            _check(False)
506    names = list(set(names))
507
508    for name in names:
509        for i in range(len(deltad[name])):
510            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
511
512    res = []
513    for i in range(len(mean)):
514        deltas = []
515        idl = []
516        obs_names = []
517        for name in names:
518            h = np.unique(deltad[name][i])
519            if len(h) == 1 and np.all(h == mean[i]) and noempty:
520                continue
521            deltas.append(deltad[name][i])
522            obs_names.append(name)
523            idl.append(idld[name])
524        res.append(Obs(deltas, obs_names, idl=idl))
525        res[-1]._value = mean[i]
526    _check(len(e_names) == ne)
527
528    cnames = list(covd.keys())
529    for i in range(len(res)):
530        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
531        if noempty:
532            for name in cnames:
533                if np.all(new_covobs[name].grad == 0):
534                    del new_covobs[name]
535            cnames_loc = list(new_covobs.keys())
536        else:
537            cnames_loc = cnames
538        for name in cnames_loc:
539            res[i].names.append(name)
540            res[i].shape[name] = 1
541            res[i].idl[name] = []
542        res[i]._covobs = new_covobs
543
544    if symbol:
545        for i in range(len(res)):
546            res[i].tag = symbol[i]
547            if res[i].tag == 'None':
548                res[i].tag = None
549    if not noempty:
550        _check(len(res[0].covobs.keys()) == nc)
551    if full_output:
552        retd = {}
553        tool = file_origin.get('tool', None)
554        if tool:
555            program = tool['name'] + ' ' + tool['version']
556        else:
557            program = ''
558        retd['program'] = program
559        retd['version'] = version
560        retd['who'] = file_origin['who']
561        retd['date'] = file_origin['date']
562        retd['host'] = file_origin['host']
563        retd['description'] = descriptiond
564        retd['enstags'] = enstags
565        retd['obsdata'] = res
566        return retd
567    else:
568        return res
569
570
571def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
572    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
573
574    Tags are not written or recovered automatically.
575
576    Parameters
577    ----------
578    fname : str
579        Filename of the input file.
580    noemtpy : bool
581        If True, ensembles with no contribution to the Obs are not included.
582        If False, ensembles are included as written in the file.
583    full_output : bool
584        If True, a dict containing auxiliary information and the data is returned.
585        If False, only the data is returned as list.
586    gz : bool
587        If True, assumes that data is gzipped. If False, assumes XML file.
588    separatior_insertion: str, int or bool
589        str: replace all occurences of "separator_insertion" within the replica names
590        by "|%s" % (separator_insertion) when constructing the names of the replica.
591        int: Insert the separator "|" at the position given by separator_insertion.
592        True (default): separator "|" is inserted after len(ensname), assuming that the
593        ensemble name is a prefix to the replica name.
594        None or False: No separator is inserted.
595
596    Returns
597    -------
598    res : list[Obs]
599        Imported data
600    or
601    res : dict
602        Imported data and meta-data
603    """
604
605    if not fname.endswith('.xml') and not fname.endswith('.gz'):
606        fname += '.xml'
607    if gz:
608        if not fname.endswith('.gz'):
609            fname += '.gz'
610        with gzip.open(fname, 'r') as fin:
611            content = fin.read()
612    else:
613        if fname.endswith('.gz'):
614            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
615        with open(fname, 'r') as fin:
616            content = fin.read()
617
618    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
619
620
621def _dobsdict_to_xmlstring(d):
622    if isinstance(d, dict):
623        iters = ''
624        for k in d:
625            if k.startswith('#value'):
626                for li in d[k]:
627                    iters += li
628                return iters + '\n'
629            elif k.startswith('#'):
630                for li in d[k]:
631                    iters += li
632                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
633                return iters
634            if isinstance(d[k], dict):
635                iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
636            elif isinstance(d[k], str):
637                if len(d[k]) > 100:
638                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
639                else:
640                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
641            elif isinstance(d[k], list):
642                tmps = ''
643                if k in ['edata', 'cdata']:
644                    for i in range(len(d[k])):
645                        tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k)
646                else:
647                    for i in range(len(d[k])):
648                        tmps += _dobsdict_to_xmlstring(d[k][i])
649                iters += tmps
650            elif isinstance(d[k], (int, float)):
651                iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k)
652            elif not d[k]:
653                return '\n'
654            else:
655                raise Exception('Type', type(d[k]), 'not supported in export!')
656    else:
657        raise Exception('Type', type(d), 'not supported in export!')
658    return iters
659
660
661def _dobsdict_to_xmlstring_spaces(d, space='  '):
662    s = _dobsdict_to_xmlstring(d)
663    o = ''
664    c = 0
665    cm = False
666    for li in s.split('\n'):
667        if li.startswith('<%s' % ('/')):
668            c -= 1
669            cm = True
670        for i in range(c):
671            o += space
672        o += li + '\n'
673        if li.startswith('<') and not cm:
674            if not '<%s' % ('/') in li:
675                c += 1
676        cm = False
677    return o
678
679
680def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
681    """Generate the string for the export of a list of Obs or structures containing Obs
682    to a .xml.gz file according to the Zeuthen dobs format.
683
684    Tags are not written or recovered automatically. The separator |is removed from the replica names.
685
686    Parameters
687    ----------
688    obsl : list
689        List of Obs that will be exported.
690        The Obs inside a structure do not have to be defined on the same set of configurations,
691        but the storage requirement is increased, if this is not the case.
692    name : str
693        The name of the observable.
694    spec : str
695        Optional string that describes the contents of the file.
696    origin : str
697        Specify where the data has its origin.
698    symbol : list
699        A list of symbols that describe the observables to be written. May be empty.
700    who : str
701        Provide the name of the person that exports the data.
702    enstags : dict
703        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
704        Otherwise, the ensemble name is used.
705
706    Returns
707    -------
708    xml_str : str
709        XML string generated from the data
710    """
711    if enstags is None:
712        enstags = {}
713    od = {}
714    r_names = []
715    for o in obsl:
716        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
717    r_names = sorted(set(r_names))
718    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
719    for tmpname in mc_names:
720        if tmpname not in enstags:
721            enstags[tmpname] = tmpname
722    ne = len(set(mc_names))
723    cov_names = []
724    for o in obsl:
725        cov_names += list(o.cov_names)
726    cov_names = sorted(set(cov_names))
727    nc = len(set(cov_names))
728    od['OBSERVABLES'] = {}
729    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
730    if who is None:
731        who = getpass.getuser()
732    od['OBSERVABLES']['origin'] = {
733        'who': who,
734        'date': str(datetime.datetime.now())[:-7],
735        'host': socket.gethostname(),
736        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
737    od['OBSERVABLES']['dobs'] = {}
738    pd = od['OBSERVABLES']['dobs']
739    pd['spec'] = spec
740    pd['origin'] = origin
741    pd['name'] = name
742    pd['array'] = {}
743    pd['array']['id'] = 'val'
744    pd['array']['layout'] = '1 f%d' % (len(obsl))
745    osymbol = ''
746    if symbol:
747        if not isinstance(symbol, list):
748            raise Exception('Symbol has to be a list!')
749        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
750            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
751        osymbol = symbol[0]
752        for s in symbol[1:]:
753            osymbol += ' %s' % s
754        pd['array']['symbol'] = osymbol
755
756    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
757    pd['ne'] = '%d' % (ne)
758    pd['nc'] = '%d' % (nc)
759    pd['edata'] = []
760    for name in mc_names:
761        ed = {}
762        ed['enstag'] = enstags[name]
763        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
764        nr = len(onames)
765        ed['nr'] = nr
766        ed[''] = []
767
768        for r in range(nr):
769            ad = {}
770            repname = onames[r]
771            ad['id'] = repname.replace('|', '')
772            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
773            Nconf = len(idx)
774            layout = '%d i f%d' % (Nconf, len(obsl))
775            ad['layout'] = layout
776            data = ''
777            counters = [0 for o in obsl]
778            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
779            for ci in idx:
780                data += '%d ' % ci
781                for oi in range(len(obsl)):
782                    o = obsl[oi]
783                    if repname in o.idl:
784                        if counters[oi] < 0:
785                            num = offsets[oi]
786                            if num == 0:
787                                data += '0 '
788                            else:
789                                data += '%1.16e ' % (num)
790                            continue
791                        if o.idl[repname][counters[oi]] == ci:
792                            num = o.deltas[repname][counters[oi]] + offsets[oi]
793                            if num == 0:
794                                data += '0 '
795                            else:
796                                data += '%1.16e ' % (num)
797                            counters[oi] += 1
798                            if counters[oi] >= len(o.idl[repname]):
799                                counters[oi] = -1
800                        else:
801                            num = offsets[oi]
802                            if num == 0:
803                                data += '0 '
804                            else:
805                                data += '%1.16e ' % (num)
806                    else:
807                        data += '0 '
808                data += '\n'
809            ad['#data'] = data
810            ed[''].append(ad)
811        pd['edata'].append(ed)
812
813        allcov = {}
814        for o in obsl:
815            for cname in o.cov_names:
816                if cname in allcov:
817                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
818                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
819                else:
820                    allcov[cname] = o.covobs[cname].cov
821        pd['cdata'] = []
822        for cname in cov_names:
823            cd = {}
824            cd['id'] = cname
825
826            covd = {'id': 'cov'}
827            if allcov[cname].shape == ():
828                ncov = 1
829                covd['layout'] = '1 1 f'
830                covd['#data'] = '%1.14e' % (allcov[cname])
831            else:
832                shape = allcov[cname].shape
833                assert (shape[0] == shape[1])
834                ncov = shape[0]
835                covd['layout'] = '%d %d f' % (ncov, ncov)
836                ds = ''
837                for i in range(ncov):
838                    for j in range(ncov):
839                        val = allcov[cname][i][j]
840                        if val == 0:
841                            ds += '0 '
842                        else:
843                            ds += '%1.14e ' % (val)
844                    ds += '\n'
845                covd['#data'] = ds
846
847            gradd = {'id': 'grad'}
848            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
849            ds = ''
850            for i in range(ncov):
851                for o in obsl:
852                    if cname in o.covobs:
853                        val = o.covobs[cname].grad[i]
854                        if val != 0:
855                            ds += '%1.14e ' % (val)
856                        else:
857                            ds += '0 '
858                    else:
859                        ds += '0 '
860            gradd['#data'] = ds
861            cd['array'] = [covd, gradd]
862            pd['cdata'].append(cd)
863
864    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
865
866    return rs
867
868
869def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
870    """Export a list of Obs or structures containing Obs to a .xml.gz file
871    according to the Zeuthen dobs format.
872
873    Tags are not written or recovered automatically. The separator | is removed from the replica names.
874
875    Parameters
876    ----------
877    obsl : list
878        List of Obs that will be exported.
879        The Obs inside a structure do not have to be defined on the same set of configurations,
880        but the storage requirement is increased, if this is not the case.
881    fname : str
882        Filename of the output file.
883    name : str
884        The name of the observable.
885    spec : str
886        Optional string that describes the contents of the file.
887    origin : str
888        Specify where the data has its origin.
889    symbol : list
890        A list of symbols that describe the observables to be written. May be empty.
891    who : str
892        Provide the name of the person that exports the data.
893    enstags : dict
894        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
895        Otherwise, the ensemble name is used.
896    gz : bool
897        If True, the output is a gzipped XML. If False, the output is a XML file.
898
899    Returns
900    -------
901    None
902    """
903    if enstags is None:
904        enstags = {}
905
906    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
907
908    if not fname.endswith('.xml') and not fname.endswith('.gz'):
909        fname += '.xml'
910
911    if gz:
912        if not fname.endswith('.gz'):
913            fname += '.gz'
914
915        fp = gzip.open(fname, 'wb')
916        fp.write(dobsstring.encode('utf-8'))
917    else:
918        fp = open(fname, 'w', encoding='utf-8')
919        fp.write(dobsstring)
920    fp.close()
def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 90    """Export a list of Obs or structures containing Obs to an xml string
 91    according to the Zeuthen pobs format.
 92
 93    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 94
 95    Parameters
 96    ----------
 97    obsl : list
 98        List of Obs that will be exported.
 99        The Obs inside a structure have to be defined on the same ensemble.
100    name : str
101        The name of the observable.
102    spec : str
103        Optional string that describes the contents of the file.
104    origin : str
105        Specify where the data has its origin.
106    symbol : list
107        A list of symbols that describe the observables to be written. May be empty.
108    enstag : str
109        Enstag that is written to pobs. If None, the ensemble name is used.
110
111    Returns
112    -------
113    xml_str : str
114        XML formatted string of the input data
115    """
116
117    od = {}
118    ename = obsl[0].e_names[0]
119    names = list(obsl[0].deltas.keys())
120    nr = len(names)
121    onames = [name.replace('|', '') for name in names]
122    for o in obsl:
123        if len(o.e_names) != 1:
124            raise Exception('You try to export dobs to obs!')
125        if o.e_names[0] != ename:
126            raise Exception('You try to export dobs to obs!')
127        if len(o.deltas.keys()) != nr:
128            raise Exception('Incompatible obses in list')
129    od['observables'] = {}
130    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
131    od['observables']['origin'] = {
132        'who': getpass.getuser(),
133        'date': str(datetime.datetime.now())[:-7],
134        'host': socket.gethostname(),
135        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
136    od['observables']['pobs'] = {}
137    pd = od['observables']['pobs']
138    pd['spec'] = spec
139    pd['origin'] = origin
140    pd['name'] = name
141    if enstag:
142        if not isinstance(enstag, str):
143            raise Exception('enstag has to be a string!')
144        pd['enstag'] = enstag
145    else:
146        pd['enstag'] = ename
147    pd['nr'] = '%d' % (nr)
148    pd['array'] = []
149    osymbol = 'cfg'
150    if not isinstance(symbol, list):
151        raise Exception('Symbol has to be a list!')
152    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
153        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
154    for s in symbol:
155        osymbol += ' %s' % s
156    for r in range(nr):
157        ad = {}
158        ad['id'] = onames[r]
159        Nconf = len(obsl[0].deltas[names[r]])
160        layout = '%d i f%d' % (Nconf, len(obsl))
161        ad['layout'] = layout
162        ad['symbol'] = osymbol
163        data = ''
164        for c in range(Nconf):
165            data += '%d ' % obsl[0].idl[names[r]][c]
166            for o in obsl:
167                num = o.deltas[names[r]][c] + o.r_values[names[r]]
168                if num == 0:
169                    data += '0 '
170                else:
171                    data += '%1.16e ' % (num)
172            data += '\n'
173        ad['#data'] = data
174        pd['array'].append(ad)
175
176    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
177    return rs

Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
Returns
  • xml_str (str): XML formatted string of the input data
def write_pobs( obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
180def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
181    """Export a list of Obs or structures containing Obs to a .xml.gz file
182    according to the Zeuthen pobs format.
183
184    Tags are not written or recovered automatically. The separator | is removed from the replica names.
185
186    Parameters
187    ----------
188    obsl : list
189        List of Obs that will be exported.
190        The Obs inside a structure have to be defined on the same ensemble.
191    fname : str
192        Filename of the output file.
193    name : str
194        The name of the observable.
195    spec : str
196        Optional string that describes the contents of the file.
197    origin : str
198        Specify where the data has its origin.
199    symbol : list
200        A list of symbols that describe the observables to be written. May be empty.
201    enstag : str
202        Enstag that is written to pobs. If None, the ensemble name is used.
203    gz : bool
204        If True, the output is a gzipped xml. If False, the output is an xml file.
205
206    Returns
207    -------
208    None
209    """
210    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
211
212    if not fname.endswith('.xml') and not fname.endswith('.gz'):
213        fname += '.xml'
214
215    if gz:
216        if not fname.endswith('.gz'):
217            fname += '.gz'
218
219        fp = gzip.open(fname, 'wb')
220        fp.write(pobsstring.encode('utf-8'))
221    else:
222        fp = open(fname, 'w', encoding='utf-8')
223        fp.write(pobsstring)
224    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
Returns
  • None
def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
301def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
302    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
303
304    Tags are not written or recovered automatically.
305
306    Parameters
307    ----------
308    fname : str
309        Filename of the input file.
310    full_output : bool
311        If True, a dict containing auxiliary information and the data is returned.
312        If False, only the data is returned as list.
313    separatior_insertion: str or int
314        str: replace all occurences of "separator_insertion" within the replica names
315        by "|%s" % (separator_insertion) when constructing the names of the replica.
316        int: Insert the separator "|" at the position given by separator_insertion.
317        None (default): Replica names remain unchanged.
318
319    Returns
320    -------
321    res : list[Obs]
322        Imported data
323    or
324    res : dict
325        Imported data and meta-data
326    """
327
328    if not fname.endswith('.xml') and not fname.endswith('.gz'):
329        fname += '.xml'
330    if gz:
331        if not fname.endswith('.gz'):
332            fname += '.gz'
333        with gzip.open(fname, 'r') as fin:
334            content = fin.read()
335    else:
336        if fname.endswith('.gz'):
337            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
338        with open(fname, 'r') as fin:
339            content = fin.read()
340
341    # parse xml file content
342    root = et.fromstring(content)
343
344    _check(root[2].tag == 'pobs')
345    pobs = root[2]
346
347    version = root[0][1].text.strip()
348
349    _check(root[1].tag == 'origin')
350    file_origin = _etree_to_dict(root[1])['origin']
351
352    deltas = []
353    names = []
354    idl = []
355    for i in range(5, len(pobs)):
356        delta, name, idx = _import_rdata(pobs[i])
357        deltas.append(delta)
358        if separator_insertion is None:
359            pass
360        elif isinstance(separator_insertion, int):
361            name = name[:separator_insertion] + '|' + name[separator_insertion:]
362        elif isinstance(separator_insertion, str):
363            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
364        else:
365            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
366        names.append(name)
367        idl.append(idx)
368    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
369
370    descriptiond = {}
371    for i in range(4):
372        descriptiond[pobs[i].tag] = pobs[i].text.strip()
373
374    _check(pobs[4].tag == "nr")
375
376    _check(pobs[5].tag == 'array')
377    if pobs[5][1].tag == 'symbol':
378        symbol = pobs[5][1].text.strip()
379        descriptiond['symbol'] = symbol
380
381    if full_output:
382        retd = {}
383        tool = file_origin.get('tool', None)
384        if tool:
385            program = tool['name'] + ' ' + tool['version']
386        else:
387            program = ''
388        retd['program'] = program
389        retd['version'] = version
390        retd['who'] = file_origin['who']
391        retd['date'] = file_origin['date']
392        retd['host'] = file_origin['host']
393        retd['description'] = descriptiond
394        retd['obsdata'] = res
395        return retd
396    else:
397        return res

Import a list of Obs from an xml.gz file in the Zeuthen pobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
Returns
  • res (list[Obs]): Imported data
  • or
  • res (dict): Imported data and meta-data
def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
401def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
402    """Import a list of Obs from a string in the Zeuthen dobs format.
403
404    Tags are not written or recovered automatically.
405
406    Parameters
407    ----------
408    content : str
409        XML string containing the data
410    noemtpy : bool
411        If True, ensembles with no contribution to the Obs are not included.
412        If False, ensembles are included as written in the file, possibly with vanishing entries.
413    full_output : bool
414        If True, a dict containing auxiliary information and the data is returned.
415        If False, only the data is returned as list.
416    separatior_insertion: str, int or bool
417        str: replace all occurences of "separator_insertion" within the replica names
418        by "|%s" % (separator_insertion) when constructing the names of the replica.
419        int: Insert the separator "|" at the position given by separator_insertion.
420        True (default): separator "|" is inserted after len(ensname), assuming that the
421        ensemble name is a prefix to the replica name.
422        None or False: No separator is inserted.
423
424    Returns
425    -------
426    res : list[Obs]
427        Imported data
428    or
429    res : dict
430        Imported data and meta-data
431    """
432
433    root = et.fromstring(content)
434
435    _check(root.tag == 'OBSERVABLES')
436    _check(root[0].tag == 'SCHEMA')
437    version = root[0][1].text.strip()
438
439    _check(root[1].tag == 'origin')
440    file_origin = _etree_to_dict(root[1])['origin']
441
442    _check(root[2].tag == 'dobs')
443
444    dobs = root[2]
445
446    descriptiond = {}
447    for i in range(3):
448        descriptiond[dobs[i].tag] = dobs[i].text.strip()
449
450    _check(dobs[3].tag == 'array')
451
452    symbol = []
453    if dobs[3][1].tag == 'symbol':
454        symbol = dobs[3][1].text.strip()
455        descriptiond['symbol'] = symbol
456    mean = _import_array(dobs[3])[0]
457
458    _check(dobs[4].tag == "ne")
459    ne = int(dobs[4].text.strip())
460    _check(dobs[5].tag == "nc")
461    nc = int(dobs[5].text.strip())
462
463    idld = {}
464    deltad = {}
465    covd = {}
466    gradd = {}
467    names = []
468    e_names = []
469    enstags = {}
470    for k in range(6, len(list(dobs))):
471        if dobs[k].tag == "edata":
472            _check(dobs[k][0].tag == "enstag")
473            ename = dobs[k][0].text.strip()
474            e_names.append(ename)
475            _check(dobs[k][1].tag == "nr")
476            R = int(dobs[k][1].text.strip())
477            for i in range(2, 2 + R):
478                deltas, rname, idx = _import_rdata(dobs[k][i])
479                if separator_insertion is None or False:
480                    pass
481                elif separator_insertion is True:
482                    if rname.startswith(ename):
483                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
484                elif isinstance(separator_insertion, int):
485                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
486                elif isinstance(separator_insertion, str):
487                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
488                else:
489                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
490                if '|' in rname:
491                    new_ename = rname[:rname.index('|')]
492                else:
493                    new_ename = ename
494                enstags[new_ename] = ename
495                idld[rname] = idx
496                deltad[rname] = deltas
497                names.append(rname)
498        elif dobs[k].tag == "cdata":
499            cname, cov, grad = _import_cdata(dobs[k])
500            covd[cname] = cov
501            if grad.shape[1] == 1:
502                gradd[cname] = [grad for i in range(len(mean))]
503            else:
504                gradd[cname] = grad.T
505        else:
506            _check(False)
507    names = list(set(names))
508
509    for name in names:
510        for i in range(len(deltad[name])):
511            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
512
513    res = []
514    for i in range(len(mean)):
515        deltas = []
516        idl = []
517        obs_names = []
518        for name in names:
519            h = np.unique(deltad[name][i])
520            if len(h) == 1 and np.all(h == mean[i]) and noempty:
521                continue
522            deltas.append(deltad[name][i])
523            obs_names.append(name)
524            idl.append(idld[name])
525        res.append(Obs(deltas, obs_names, idl=idl))
526        res[-1]._value = mean[i]
527    _check(len(e_names) == ne)
528
529    cnames = list(covd.keys())
530    for i in range(len(res)):
531        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
532        if noempty:
533            for name in cnames:
534                if np.all(new_covobs[name].grad == 0):
535                    del new_covobs[name]
536            cnames_loc = list(new_covobs.keys())
537        else:
538            cnames_loc = cnames
539        for name in cnames_loc:
540            res[i].names.append(name)
541            res[i].shape[name] = 1
542            res[i].idl[name] = []
543        res[i]._covobs = new_covobs
544
545    if symbol:
546        for i in range(len(res)):
547            res[i].tag = symbol[i]
548            if res[i].tag == 'None':
549                res[i].tag = None
550    if not noempty:
551        _check(len(res[0].covobs.keys()) == nc)
552    if full_output:
553        retd = {}
554        tool = file_origin.get('tool', None)
555        if tool:
556            program = tool['name'] + ' ' + tool['version']
557        else:
558            program = ''
559        retd['program'] = program
560        retd['version'] = version
561        retd['who'] = file_origin['who']
562        retd['date'] = file_origin['date']
563        retd['host'] = file_origin['host']
564        retd['description'] = descriptiond
565        retd['enstags'] = enstags
566        retd['obsdata'] = res
567        return retd
568    else:
569        return res

Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • content (str): XML string containing the data
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
Returns
  • res (list[Obs]): Imported data
  • or
  • res (dict): Imported data and meta-data
def read_dobs( fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
572def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
573    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
574
575    Tags are not written or recovered automatically.
576
577    Parameters
578    ----------
579    fname : str
580        Filename of the input file.
581    noemtpy : bool
582        If True, ensembles with no contribution to the Obs are not included.
583        If False, ensembles are included as written in the file.
584    full_output : bool
585        If True, a dict containing auxiliary information and the data is returned.
586        If False, only the data is returned as list.
587    gz : bool
588        If True, assumes that data is gzipped. If False, assumes XML file.
589    separatior_insertion: str, int or bool
590        str: replace all occurences of "separator_insertion" within the replica names
591        by "|%s" % (separator_insertion) when constructing the names of the replica.
592        int: Insert the separator "|" at the position given by separator_insertion.
593        True (default): separator "|" is inserted after len(ensname), assuming that the
594        ensemble name is a prefix to the replica name.
595        None or False: No separator is inserted.
596
597    Returns
598    -------
599    res : list[Obs]
600        Imported data
601    or
602    res : dict
603        Imported data and meta-data
604    """
605
606    if not fname.endswith('.xml') and not fname.endswith('.gz'):
607        fname += '.xml'
608    if gz:
609        if not fname.endswith('.gz'):
610            fname += '.gz'
611        with gzip.open(fname, 'r') as fin:
612            content = fin.read()
613    else:
614        if fname.endswith('.gz'):
615            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
616        with open(fname, 'r') as fin:
617            content = fin.read()
618
619    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)

Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
Returns
  • res (list[Obs]): Imported data
  • or
  • res (dict): Imported data and meta-data
def create_dobs_string( obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
681def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
682    """Generate the string for the export of a list of Obs or structures containing Obs
683    to a .xml.gz file according to the Zeuthen dobs format.
684
685    Tags are not written or recovered automatically. The separator |is removed from the replica names.
686
687    Parameters
688    ----------
689    obsl : list
690        List of Obs that will be exported.
691        The Obs inside a structure do not have to be defined on the same set of configurations,
692        but the storage requirement is increased, if this is not the case.
693    name : str
694        The name of the observable.
695    spec : str
696        Optional string that describes the contents of the file.
697    origin : str
698        Specify where the data has its origin.
699    symbol : list
700        A list of symbols that describe the observables to be written. May be empty.
701    who : str
702        Provide the name of the person that exports the data.
703    enstags : dict
704        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
705        Otherwise, the ensemble name is used.
706
707    Returns
708    -------
709    xml_str : str
710        XML string generated from the data
711    """
712    if enstags is None:
713        enstags = {}
714    od = {}
715    r_names = []
716    for o in obsl:
717        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
718    r_names = sorted(set(r_names))
719    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
720    for tmpname in mc_names:
721        if tmpname not in enstags:
722            enstags[tmpname] = tmpname
723    ne = len(set(mc_names))
724    cov_names = []
725    for o in obsl:
726        cov_names += list(o.cov_names)
727    cov_names = sorted(set(cov_names))
728    nc = len(set(cov_names))
729    od['OBSERVABLES'] = {}
730    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
731    if who is None:
732        who = getpass.getuser()
733    od['OBSERVABLES']['origin'] = {
734        'who': who,
735        'date': str(datetime.datetime.now())[:-7],
736        'host': socket.gethostname(),
737        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
738    od['OBSERVABLES']['dobs'] = {}
739    pd = od['OBSERVABLES']['dobs']
740    pd['spec'] = spec
741    pd['origin'] = origin
742    pd['name'] = name
743    pd['array'] = {}
744    pd['array']['id'] = 'val'
745    pd['array']['layout'] = '1 f%d' % (len(obsl))
746    osymbol = ''
747    if symbol:
748        if not isinstance(symbol, list):
749            raise Exception('Symbol has to be a list!')
750        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
751            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
752        osymbol = symbol[0]
753        for s in symbol[1:]:
754            osymbol += ' %s' % s
755        pd['array']['symbol'] = osymbol
756
757    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
758    pd['ne'] = '%d' % (ne)
759    pd['nc'] = '%d' % (nc)
760    pd['edata'] = []
761    for name in mc_names:
762        ed = {}
763        ed['enstag'] = enstags[name]
764        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
765        nr = len(onames)
766        ed['nr'] = nr
767        ed[''] = []
768
769        for r in range(nr):
770            ad = {}
771            repname = onames[r]
772            ad['id'] = repname.replace('|', '')
773            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
774            Nconf = len(idx)
775            layout = '%d i f%d' % (Nconf, len(obsl))
776            ad['layout'] = layout
777            data = ''
778            counters = [0 for o in obsl]
779            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
780            for ci in idx:
781                data += '%d ' % ci
782                for oi in range(len(obsl)):
783                    o = obsl[oi]
784                    if repname in o.idl:
785                        if counters[oi] < 0:
786                            num = offsets[oi]
787                            if num == 0:
788                                data += '0 '
789                            else:
790                                data += '%1.16e ' % (num)
791                            continue
792                        if o.idl[repname][counters[oi]] == ci:
793                            num = o.deltas[repname][counters[oi]] + offsets[oi]
794                            if num == 0:
795                                data += '0 '
796                            else:
797                                data += '%1.16e ' % (num)
798                            counters[oi] += 1
799                            if counters[oi] >= len(o.idl[repname]):
800                                counters[oi] = -1
801                        else:
802                            num = offsets[oi]
803                            if num == 0:
804                                data += '0 '
805                            else:
806                                data += '%1.16e ' % (num)
807                    else:
808                        data += '0 '
809                data += '\n'
810            ad['#data'] = data
811            ed[''].append(ad)
812        pd['edata'].append(ed)
813
814        allcov = {}
815        for o in obsl:
816            for cname in o.cov_names:
817                if cname in allcov:
818                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
819                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
820                else:
821                    allcov[cname] = o.covobs[cname].cov
822        pd['cdata'] = []
823        for cname in cov_names:
824            cd = {}
825            cd['id'] = cname
826
827            covd = {'id': 'cov'}
828            if allcov[cname].shape == ():
829                ncov = 1
830                covd['layout'] = '1 1 f'
831                covd['#data'] = '%1.14e' % (allcov[cname])
832            else:
833                shape = allcov[cname].shape
834                assert (shape[0] == shape[1])
835                ncov = shape[0]
836                covd['layout'] = '%d %d f' % (ncov, ncov)
837                ds = ''
838                for i in range(ncov):
839                    for j in range(ncov):
840                        val = allcov[cname][i][j]
841                        if val == 0:
842                            ds += '0 '
843                        else:
844                            ds += '%1.14e ' % (val)
845                    ds += '\n'
846                covd['#data'] = ds
847
848            gradd = {'id': 'grad'}
849            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
850            ds = ''
851            for i in range(ncov):
852                for o in obsl:
853                    if cname in o.covobs:
854                        val = o.covobs[cname].grad[i]
855                        if val != 0:
856                            ds += '%1.14e ' % (val)
857                        else:
858                            ds += '0 '
859                    else:
860                        ds += '0 '
861            gradd['#data'] = ds
862            cd['array'] = [covd, gradd]
863            pd['cdata'].append(cd)
864
865    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
866
867    return rs

Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator |is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
Returns
  • xml_str (str): XML string generated from the data
def write_dobs( obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
870def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
871    """Export a list of Obs or structures containing Obs to a .xml.gz file
872    according to the Zeuthen dobs format.
873
874    Tags are not written or recovered automatically. The separator | is removed from the replica names.
875
876    Parameters
877    ----------
878    obsl : list
879        List of Obs that will be exported.
880        The Obs inside a structure do not have to be defined on the same set of configurations,
881        but the storage requirement is increased, if this is not the case.
882    fname : str
883        Filename of the output file.
884    name : str
885        The name of the observable.
886    spec : str
887        Optional string that describes the contents of the file.
888    origin : str
889        Specify where the data has its origin.
890    symbol : list
891        A list of symbols that describe the observables to be written. May be empty.
892    who : str
893        Provide the name of the person that exports the data.
894    enstags : dict
895        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
896        Otherwise, the ensemble name is used.
897    gz : bool
898        If True, the output is a gzipped XML. If False, the output is a XML file.
899
900    Returns
901    -------
902    None
903    """
904    if enstags is None:
905        enstags = {}
906
907    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
908
909    if not fname.endswith('.xml') and not fname.endswith('.gz'):
910        fname += '.xml'
911
912    if gz:
913        if not fname.endswith('.gz'):
914            fname += '.gz'
915
916        fp = gzip.open(fname, 'wb')
917        fp.write(dobsstring.encode('utf-8'))
918    else:
919        fp = open(fname, 'w', encoding='utf-8')
920        fp.write(dobsstring)
921    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.
Returns
  • None