pyerrors.input.dobs

  1from collections import defaultdict
  2import gzip
  3import lxml.etree as et
  4import getpass
  5import socket
  6import datetime
  7import json
  8import warnings
  9import numpy as np
 10from ..obs import Obs
 11from ..obs import _merge_idx
 12from ..covobs import Covobs
 13from .. import version as pyerrorsversion
 14
 15
 16# Based on https://stackoverflow.com/a/10076823
 17def _etree_to_dict(t):
 18    """ Convert the content of an XML file to a python dict"""
 19    d = {t.tag: {} if t.attrib else None}
 20    children = list(t)
 21    if children:
 22        dd = defaultdict(list)
 23        for dc in map(_etree_to_dict, children):
 24            for k, v in dc.items():
 25                dd[k].append(v)
 26        d = {t.tag: {k: v[0] if len(v) == 1 else v
 27                     for k, v in dd.items()}}
 28    if t.attrib:
 29        d[t.tag].update(('@' + k, v)
 30                        for k, v in t.attrib.items())
 31    if t.text:
 32        text = t.text.strip()
 33        if children or t.attrib:
 34            if text:
 35                d[t.tag]['#data'] = [text]
 36        else:
 37            d[t.tag] = text
 38    return d
 39
 40
 41def _dict_to_xmlstring(d):
 42    if isinstance(d, dict):
 43        iters = ''
 44        for k in d:
 45            if k.startswith('#'):
 46                for la in d[k]:
 47                    iters += la
 48                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
 49                return iters
 50            if isinstance(d[k], dict):
 51                iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
 52            elif isinstance(d[k], str):
 53                if len(d[k]) > 100:
 54                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
 55                else:
 56                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
 57            elif isinstance(d[k], list):
 58                for i in range(len(d[k])):
 59                    iters += _dict_to_xmlstring(d[k][i])
 60            elif not d[k]:
 61                return '\n'
 62            else:
 63                raise Exception('Type', type(d[k]), 'not supported in export!')
 64    else:
 65        raise Exception('Type', type(d), 'not supported in export!')
 66    return iters
 67
 68
 69def _dict_to_xmlstring_spaces(d, space='  '):
 70    s = _dict_to_xmlstring(d)
 71    o = ''
 72    c = 0
 73    cm = False
 74    for li in s.split('\n'):
 75        if li.startswith('<%s' % ('/')):
 76            c -= 1
 77            cm = True
 78        for i in range(c):
 79            o += space
 80        o += li + '\n'
 81        if li.startswith('<') and not cm:
 82            if not '<%s' % ('/') in li:
 83                c += 1
 84        cm = False
 85    return o
 86
 87
 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89    """Export a list of Obs or structures containing Obs to an xml string
 90    according to the Zeuthen pobs format.
 91
 92    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 93
 94    Parameters
 95    ----------
 96    obsl : list
 97        List of Obs that will be exported.
 98        The Obs inside a structure have to be defined on the same ensemble.
 99    name : str
100        The name of the observable.
101    spec : str
102        Optional string that describes the contents of the file.
103    origin : str
104        Specify where the data has its origin.
105    symbol : list
106        A list of symbols that describe the observables to be written. May be empty.
107    enstag : str
108        Enstag that is written to pobs. If None, the ensemble name is used.
109
110    Returns
111    -------
112    xml_str : str
113        XML formatted string of the input data
114    """
115
116    od = {}
117    ename = obsl[0].e_names[0]
118    names = list(obsl[0].deltas.keys())
119    nr = len(names)
120    onames = [name.replace('|', '') for name in names]
121    for o in obsl:
122        if len(o.e_names) != 1:
123            raise Exception('You try to export dobs to obs!')
124        if o.e_names[0] != ename:
125            raise Exception('You try to export dobs to obs!')
126        if len(o.deltas.keys()) != nr:
127            raise Exception('Incompatible obses in list')
128    od['observables'] = {}
129    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
130    od['observables']['origin'] = {
131        'who': getpass.getuser(),
132        'date': str(datetime.datetime.now())[:-7],
133        'host': socket.gethostname(),
134        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
135    od['observables']['pobs'] = {}
136    pd = od['observables']['pobs']
137    pd['spec'] = spec
138    pd['origin'] = origin
139    pd['name'] = name
140    if enstag:
141        if not isinstance(enstag, str):
142            raise Exception('enstag has to be a string!')
143        pd['enstag'] = enstag
144    else:
145        pd['enstag'] = ename
146    pd['nr'] = '%d' % (nr)
147    pd['array'] = []
148    osymbol = 'cfg'
149    if not isinstance(symbol, list):
150        raise Exception('Symbol has to be a list!')
151    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
152        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
153    for s in symbol:
154        osymbol += ' %s' % s
155    for r in range(nr):
156        ad = {}
157        ad['id'] = onames[r]
158        Nconf = len(obsl[0].deltas[names[r]])
159        layout = '%d i f%d' % (Nconf, len(obsl))
160        ad['layout'] = layout
161        ad['symbol'] = osymbol
162        data = ''
163        for c in range(Nconf):
164            data += '%d ' % obsl[0].idl[names[r]][c]
165            for o in obsl:
166                num = o.deltas[names[r]][c] + o.r_values[names[r]]
167                if num == 0:
168                    data += '0 '
169                else:
170                    data += '%1.16e ' % (num)
171            data += '\n'
172        ad['#data'] = data
173        pd['array'].append(ad)
174
175    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
176    return rs
177
178
179def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
180    """Export a list of Obs or structures containing Obs to a .xml.gz file
181    according to the Zeuthen pobs format.
182
183    Tags are not written or recovered automatically. The separator | is removed from the replica names.
184
185    Parameters
186    ----------
187    obsl : list
188        List of Obs that will be exported.
189        The Obs inside a structure have to be defined on the same ensemble.
190    fname : str
191        Filename of the output file.
192    name : str
193        The name of the observable.
194    spec : str
195        Optional string that describes the contents of the file.
196    origin : str
197        Specify where the data has its origin.
198    symbol : list
199        A list of symbols that describe the observables to be written. May be empty.
200    enstag : str
201        Enstag that is written to pobs. If None, the ensemble name is used.
202    gz : bool
203        If True, the output is a gzipped xml. If False, the output is an xml file.
204
205    Returns
206    -------
207    None
208    """
209    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
210
211    if not fname.endswith('.xml') and not fname.endswith('.gz'):
212        fname += '.xml'
213
214    if gz:
215        if not fname.endswith('.gz'):
216            fname += '.gz'
217
218        fp = gzip.open(fname, 'wb')
219        fp.write(pobsstring.encode('utf-8'))
220    else:
221        fp = open(fname, 'w', encoding='utf-8')
222        fp.write(pobsstring)
223    fp.close()
224
225
226def _import_data(string):
227    return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]")
228
229
230def _check(condition):
231    if not condition:
232        raise Exception("XML file format not supported")
233
234
235class _NoTagInDataError(Exception):
236    """Raised when tag is not in data"""
237    def __init__(self, tag):
238        self.tag = tag
239        super().__init__('Tag %s not in data!' % (self.tag))
240
241
242def _find_tag(dat, tag):
243    for i in range(len(dat)):
244        if dat[i].tag == tag:
245            return i
246    raise _NoTagInDataError(tag)
247
248
249def _import_array(arr):
250    name = arr[_find_tag(arr, 'id')].text.strip()
251    index = _find_tag(arr, 'layout')
252    try:
253        sindex = _find_tag(arr, 'symbol')
254    except _NoTagInDataError:
255        sindex = 0
256    if sindex > index:
257        tmp = _import_data(arr[sindex].tail)
258    else:
259        tmp = _import_data(arr[index].tail)
260
261    li = arr[index].text.strip()
262    m = li.split()
263    if m[1] == "i" and m[2][0] == "f":
264        nc = int(m[0])
265        na = int(m[2].lstrip('f'))
266        _dat = []
267        mask = []
268        for a in range(na):
269            mask += [a]
270            _dat += [np.array(tmp[1 + a:: na + 1])]
271        _check(len(tmp[0:: na + 1]) == nc)
272        return [name, tmp[0:: na + 1], mask, _dat]
273    elif m[1][0] == 'f' and len(m) < 3:
274        sh = (int(m[0]), int(m[1].lstrip('f')))
275        return np.reshape(tmp, sh)
276    elif any(['f' in s for s in m]):
277        for si in range(len(m)):
278            if m[si] == 'f':
279                break
280        sh = [int(m[i]) for i in range(si)]
281        return np.reshape(tmp, sh)
282    else:
283        print(name, m)
284        _check(False)
285
286
287def _import_rdata(rd):
288    name, idx, mask, deltas = _import_array(rd)
289    return deltas, name, idx
290
291
292def _import_cdata(cd):
293    _check(cd[0].tag == "id")
294    _check(cd[1][0].text.strip() == "cov")
295    cov = _import_array(cd[1])
296    grad = _import_array(cd[2])
297    return cd[0].text.strip(), cov, grad
298
299
300def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
301    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
302
303    Tags are not written or recovered automatically.
304
305    Parameters
306    ----------
307    fname : str
308        Filename of the input file.
309    full_output : bool
310        If True, a dict containing auxiliary information and the data is returned.
311        If False, only the data is returned as list.
312    separatior_insertion: str or int
313        str: replace all occurences of "separator_insertion" within the replica names
314        by "|%s" % (separator_insertion) when constructing the names of the replica.
315        int: Insert the separator "|" at the position given by separator_insertion.
316        None (default): Replica names remain unchanged.
317
318    Returns
319    -------
320    res : list[Obs]
321        Imported data
322    or
323    res : dict
324        Imported data and meta-data
325    """
326
327    if not fname.endswith('.xml') and not fname.endswith('.gz'):
328        fname += '.xml'
329    if gz:
330        if not fname.endswith('.gz'):
331            fname += '.gz'
332        with gzip.open(fname, 'r') as fin:
333            content = fin.read()
334    else:
335        if fname.endswith('.gz'):
336            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
337        with open(fname, 'r') as fin:
338            content = fin.read()
339
340    # parse xml file content
341    root = et.fromstring(content)
342
343    _check(root[2].tag == 'pobs')
344    pobs = root[2]
345
346    version = root[0][1].text.strip()
347
348    _check(root[1].tag == 'origin')
349    file_origin = _etree_to_dict(root[1])['origin']
350
351    deltas = []
352    names = []
353    idl = []
354    for i in range(5, len(pobs)):
355        delta, name, idx = _import_rdata(pobs[i])
356        deltas.append(delta)
357        if separator_insertion is None:
358            pass
359        elif isinstance(separator_insertion, int):
360            name = name[:separator_insertion] + '|' + name[separator_insertion:]
361        elif isinstance(separator_insertion, str):
362            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
363        else:
364            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
365        names.append(name)
366        idl.append(idx)
367    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
368
369    descriptiond = {}
370    for i in range(4):
371        descriptiond[pobs[i].tag] = pobs[i].text.strip()
372
373    _check(pobs[4].tag == "nr")
374
375    _check(pobs[5].tag == 'array')
376    if pobs[5][1].tag == 'symbol':
377        symbol = pobs[5][1].text.strip()
378        descriptiond['symbol'] = symbol
379
380    if full_output:
381        retd = {}
382        tool = file_origin.get('tool', None)
383        if tool:
384            program = tool['name'] + ' ' + tool['version']
385        else:
386            program = ''
387        retd['program'] = program
388        retd['version'] = version
389        retd['who'] = file_origin['who']
390        retd['date'] = file_origin['date']
391        retd['host'] = file_origin['host']
392        retd['description'] = descriptiond
393        retd['obsdata'] = res
394        return retd
395    else:
396        return res
397
398
399# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
400def import_dobs_string(content, full_output=False, separator_insertion=True):
401    """Import a list of Obs from a string in the Zeuthen dobs format.
402
403    Tags are not written or recovered automatically.
404
405    Parameters
406    ----------
407    content : str
408        XML string containing the data
409    full_output : bool
410        If True, a dict containing auxiliary information and the data is returned.
411        If False, only the data is returned as list.
412    separatior_insertion: str, int or bool
413        str: replace all occurences of "separator_insertion" within the replica names
414        by "|%s" % (separator_insertion) when constructing the names of the replica.
415        int: Insert the separator "|" at the position given by separator_insertion.
416        True (default): separator "|" is inserted after len(ensname), assuming that the
417        ensemble name is a prefix to the replica name.
418        None or False: No separator is inserted.
419
420    Returns
421    -------
422    res : list[Obs]
423        Imported data
424    or
425    res : dict
426        Imported data and meta-data
427    """
428
429    root = et.fromstring(content)
430
431    _check(root.tag == 'OBSERVABLES')
432    _check(root[0].tag == 'SCHEMA')
433    version = root[0][1].text.strip()
434
435    _check(root[1].tag == 'origin')
436    file_origin = _etree_to_dict(root[1])['origin']
437
438    _check(root[2].tag == 'dobs')
439
440    dobs = root[2]
441
442    descriptiond = {}
443    for i in range(3):
444        descriptiond[dobs[i].tag] = dobs[i].text.strip()
445
446    _check(dobs[3].tag == 'array')
447
448    symbol = []
449    if dobs[3][1].tag == 'symbol':
450        symbol = dobs[3][1].text.strip()
451        descriptiond['symbol'] = symbol
452    mean = _import_array(dobs[3])[0]
453
454    _check(dobs[4].tag == "ne")
455    ne = int(dobs[4].text.strip())
456    _check(dobs[5].tag == "nc")
457
458    idld = {}
459    deltad = {}
460    covd = {}
461    gradd = {}
462    names = []
463    e_names = []
464    enstags = {}
465    for k in range(6, len(list(dobs))):
466        if dobs[k].tag == "edata":
467            _check(dobs[k][0].tag == "enstag")
468            ename = dobs[k][0].text.strip()
469            e_names.append(ename)
470            _check(dobs[k][1].tag == "nr")
471            R = int(dobs[k][1].text.strip())
472            for i in range(2, 2 + R):
473                deltas, rname, idx = _import_rdata(dobs[k][i])
474                if separator_insertion is None or False:
475                    pass
476                elif separator_insertion is True:
477                    if rname.startswith(ename):
478                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
479                elif isinstance(separator_insertion, int):
480                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
481                elif isinstance(separator_insertion, str):
482                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
483                else:
484                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
485                if '|' in rname:
486                    new_ename = rname[:rname.index('|')]
487                else:
488                    new_ename = ename
489                enstags[new_ename] = ename
490                idld[rname] = idx
491                deltad[rname] = deltas
492                names.append(rname)
493        elif dobs[k].tag == "cdata":
494            cname, cov, grad = _import_cdata(dobs[k])
495            covd[cname] = cov
496            if grad.shape[1] == 1:
497                gradd[cname] = [grad for i in range(len(mean))]
498            else:
499                gradd[cname] = grad.T
500        else:
501            _check(False)
502    names = list(set(names))
503
504    for name in names:
505        for i in range(len(deltad[name])):
506            tmp = np.zeros_like(deltad[name][i])
507            for j in range(len(deltad[name][i])):
508                if deltad[name][i][j] != 0.:
509                    tmp[j] = deltad[name][i][j] + mean[i]
510            deltad[name][i] = tmp
511
512    res = []
513    for i in range(len(mean)):
514        deltas = []
515        idl = []
516        obs_names = []
517        for name in names:
518            h = np.unique(deltad[name][i])
519            if len(h) == 1 and np.all(h == mean[i]):
520                continue
521            repdeltas = []
522            repidl = []
523            for j in range(len(deltad[name][i])):
524                if deltad[name][i][j] != 0.:
525                    repdeltas.append(deltad[name][i][j])
526                    repidl.append(idld[name][j])
527            if len(repdeltas) > 0:
528                obs_names.append(name)
529                deltas.append(repdeltas)
530                idl.append(repidl)
531
532        res.append(Obs(deltas, obs_names, idl=idl))
533        res[-1]._value = mean[i]
534    _check(len(e_names) == ne)
535
536    cnames = list(covd.keys())
537    for i in range(len(res)):
538        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
539        for name in cnames:
540            if np.all(new_covobs[name].grad == 0):
541                del new_covobs[name]
542        cnames_loc = list(new_covobs.keys())
543        for name in cnames_loc:
544            res[i].names.append(name)
545            res[i].shape[name] = 1
546            res[i].idl[name] = []
547        res[i]._covobs = new_covobs
548
549    if symbol:
550        for i in range(len(res)):
551            res[i].tag = symbol[i]
552            if res[i].tag == 'None':
553                res[i].tag = None
554    if full_output:
555        retd = {}
556        tool = file_origin.get('tool', None)
557        if tool:
558            program = tool['name'] + ' ' + tool['version']
559        else:
560            program = ''
561        retd['program'] = program
562        retd['version'] = version
563        retd['who'] = file_origin['who']
564        retd['date'] = file_origin['date']
565        retd['host'] = file_origin['host']
566        retd['description'] = descriptiond
567        retd['enstags'] = enstags
568        retd['obsdata'] = res
569        return retd
570    else:
571        return res
572
573
574def read_dobs(fname, full_output=False, gz=True, separator_insertion=True):
575    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
576
577    Tags are not written or recovered automatically.
578
579    Parameters
580    ----------
581    fname : str
582        Filename of the input file.
583    full_output : bool
584        If True, a dict containing auxiliary information and the data is returned.
585        If False, only the data is returned as list.
586    gz : bool
587        If True, assumes that data is gzipped. If False, assumes XML file.
588    separatior_insertion: str, int or bool
589        str: replace all occurences of "separator_insertion" within the replica names
590        by "|%s" % (separator_insertion) when constructing the names of the replica.
591        int: Insert the separator "|" at the position given by separator_insertion.
592        True (default): separator "|" is inserted after len(ensname), assuming that the
593        ensemble name is a prefix to the replica name.
594        None or False: No separator is inserted.
595
596    Returns
597    -------
598    res : list[Obs]
599        Imported data
600    or
601    res : dict
602        Imported data and meta-data
603    """
604
605    if not fname.endswith('.xml') and not fname.endswith('.gz'):
606        fname += '.xml'
607    if gz:
608        if not fname.endswith('.gz'):
609            fname += '.gz'
610        with gzip.open(fname, 'r') as fin:
611            content = fin.read()
612    else:
613        if fname.endswith('.gz'):
614            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
615        with open(fname, 'r') as fin:
616            content = fin.read()
617
618    return import_dobs_string(content, full_output, separator_insertion=separator_insertion)
619
620
621def _dobsdict_to_xmlstring(d):
622    if isinstance(d, dict):
623        iters = ''
624        for k in d:
625            if k.startswith('#value'):
626                for li in d[k]:
627                    iters += li
628                return iters + '\n'
629            elif k.startswith('#'):
630                for li in d[k]:
631                    iters += li
632                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
633                return iters
634            if isinstance(d[k], dict):
635                iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
636            elif isinstance(d[k], str):
637                if len(d[k]) > 100:
638                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
639                else:
640                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
641            elif isinstance(d[k], list):
642                tmps = ''
643                if k in ['edata', 'cdata']:
644                    for i in range(len(d[k])):
645                        tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k)
646                else:
647                    for i in range(len(d[k])):
648                        tmps += _dobsdict_to_xmlstring(d[k][i])
649                iters += tmps
650            elif isinstance(d[k], (int, float)):
651                iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k)
652            elif not d[k]:
653                return '\n'
654            else:
655                raise Exception('Type', type(d[k]), 'not supported in export!')
656    else:
657        raise Exception('Type', type(d), 'not supported in export!')
658    return iters
659
660
661def _dobsdict_to_xmlstring_spaces(d, space='  '):
662    s = _dobsdict_to_xmlstring(d)
663    o = ''
664    c = 0
665    cm = False
666    for li in s.split('\n'):
667        if li.startswith('<%s' % ('/')):
668            c -= 1
669            cm = True
670        for i in range(c):
671            o += space
672        o += li + '\n'
673        if li.startswith('<') and not cm:
674            if not '<%s' % ('/') in li:
675                c += 1
676        cm = False
677    return o
678
679
680def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
681    """Generate the string for the export of a list of Obs or structures containing Obs
682    to a .xml.gz file according to the Zeuthen dobs format.
683
684    Tags are not written or recovered automatically. The separator |is removed from the replica names.
685
686    Parameters
687    ----------
688    obsl : list
689        List of Obs that will be exported.
690        The Obs inside a structure do not have to be defined on the same set of configurations,
691        but the storage requirement is increased, if this is not the case.
692    name : str
693        The name of the observable.
694    spec : str
695        Optional string that describes the contents of the file.
696    origin : str
697        Specify where the data has its origin.
698    symbol : list
699        A list of symbols that describe the observables to be written. May be empty.
700    who : str
701        Provide the name of the person that exports the data.
702    enstags : dict
703        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
704        Otherwise, the ensemble name is used.
705
706    Returns
707    -------
708    xml_str : str
709        XML string generated from the data
710    """
711    if enstags is None:
712        enstags = {}
713    od = {}
714    r_names = []
715    for o in obsl:
716        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
717    r_names = sorted(set(r_names))
718    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
719    for tmpname in mc_names:
720        if tmpname not in enstags:
721            enstags[tmpname] = tmpname
722    ne = len(set(mc_names))
723    cov_names = []
724    for o in obsl:
725        cov_names += list(o.cov_names)
726    cov_names = sorted(set(cov_names))
727    nc = len(set(cov_names))
728    od['OBSERVABLES'] = {}
729    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
730    if who is None:
731        who = getpass.getuser()
732    od['OBSERVABLES']['origin'] = {
733        'who': who,
734        'date': str(datetime.datetime.now())[:-7],
735        'host': socket.gethostname(),
736        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
737    od['OBSERVABLES']['dobs'] = {}
738    pd = od['OBSERVABLES']['dobs']
739    pd['spec'] = spec
740    pd['origin'] = origin
741    pd['name'] = name
742    pd['array'] = {}
743    pd['array']['id'] = 'val'
744    pd['array']['layout'] = '1 f%d' % (len(obsl))
745    osymbol = ''
746    if symbol:
747        if not isinstance(symbol, list):
748            raise Exception('Symbol has to be a list!')
749        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
750            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
751        osymbol = symbol[0]
752        for s in symbol[1:]:
753            osymbol += ' %s' % s
754        pd['array']['symbol'] = osymbol
755
756    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
757    pd['ne'] = '%d' % (ne)
758    pd['nc'] = '%d' % (nc)
759    pd['edata'] = []
760    for name in mc_names:
761        ed = {}
762        ed['enstag'] = enstags[name]
763        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
764        nr = len(onames)
765        ed['nr'] = nr
766        ed[''] = []
767
768        for r in range(nr):
769            ad = {}
770            repname = onames[r]
771            ad['id'] = repname.replace('|', '')
772            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
773            Nconf = len(idx)
774            layout = '%d i f%d' % (Nconf, len(obsl))
775            ad['layout'] = layout
776            data = ''
777            counters = [0 for o in obsl]
778            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
779            for ci in idx:
780                data += '%d ' % ci
781                for oi in range(len(obsl)):
782                    o = obsl[oi]
783                    if repname in o.idl:
784                        if counters[oi] < 0:
785                            num = 0
786                            if num == 0:
787                                data += '0 '
788                            else:
789                                data += '%1.16e ' % (num)
790                            continue
791                        if o.idl[repname][counters[oi]] == ci:
792                            num = o.deltas[repname][counters[oi]] + offsets[oi]
793                            if num == 0:
794                                data += '0 '
795                            else:
796                                data += '%1.16e ' % (num)
797                            counters[oi] += 1
798                            if counters[oi] >= len(o.idl[repname]):
799                                counters[oi] = -1
800                        else:
801                            num = 0
802                            if num == 0:
803                                data += '0 '
804                            else:
805                                data += '%1.16e ' % (num)
806                    else:
807                        data += '0 '
808                data += '\n'
809            ad['#data'] = data
810            ed[''].append(ad)
811        pd['edata'].append(ed)
812
813        allcov = {}
814        for o in obsl:
815            for cname in o.cov_names:
816                if cname in allcov:
817                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
818                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
819                else:
820                    allcov[cname] = o.covobs[cname].cov
821        pd['cdata'] = []
822        for cname in cov_names:
823            cd = {}
824            cd['id'] = cname
825
826            covd = {'id': 'cov'}
827            if allcov[cname].shape == ():
828                ncov = 1
829                covd['layout'] = '1 1 f'
830                covd['#data'] = '%1.14e' % (allcov[cname])
831            else:
832                shape = allcov[cname].shape
833                assert (shape[0] == shape[1])
834                ncov = shape[0]
835                covd['layout'] = '%d %d f' % (ncov, ncov)
836                ds = ''
837                for i in range(ncov):
838                    for j in range(ncov):
839                        val = allcov[cname][i][j]
840                        if val == 0:
841                            ds += '0 '
842                        else:
843                            ds += '%1.14e ' % (val)
844                    ds += '\n'
845                covd['#data'] = ds
846
847            gradd = {'id': 'grad'}
848            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
849            ds = ''
850            for i in range(ncov):
851                for o in obsl:
852                    if cname in o.covobs:
853                        val = o.covobs[cname].grad[i].item()
854                        if val != 0:
855                            ds += '%1.14e ' % (val)
856                        else:
857                            ds += '0 '
858                    else:
859                        ds += '0 '
860            gradd['#data'] = ds
861            cd['array'] = [covd, gradd]
862            pd['cdata'].append(cd)
863
864    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
865
866    return rs
867
868
869def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
870    """Export a list of Obs or structures containing Obs to a .xml.gz file
871    according to the Zeuthen dobs format.
872
873    Tags are not written or recovered automatically. The separator | is removed from the replica names.
874
875    Parameters
876    ----------
877    obsl : list
878        List of Obs that will be exported.
879        The Obs inside a structure do not have to be defined on the same set of configurations,
880        but the storage requirement is increased, if this is not the case.
881    fname : str
882        Filename of the output file.
883    name : str
884        The name of the observable.
885    spec : str
886        Optional string that describes the contents of the file.
887    origin : str
888        Specify where the data has its origin.
889    symbol : list
890        A list of symbols that describe the observables to be written. May be empty.
891    who : str
892        Provide the name of the person that exports the data.
893    enstags : dict
894        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
895        Otherwise, the ensemble name is used.
896    gz : bool
897        If True, the output is a gzipped XML. If False, the output is a XML file.
898
899    Returns
900    -------
901    None
902    """
903    if enstags is None:
904        enstags = {}
905
906    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
907
908    if not fname.endswith('.xml') and not fname.endswith('.gz'):
909        fname += '.xml'
910
911    if gz:
912        if not fname.endswith('.gz'):
913            fname += '.gz'
914
915        fp = gzip.open(fname, 'wb')
916        fp.write(dobsstring.encode('utf-8'))
917    else:
918        fp = open(fname, 'w', encoding='utf-8')
919        fp.write(dobsstring)
920    fp.close()
def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 90    """Export a list of Obs or structures containing Obs to an xml string
 91    according to the Zeuthen pobs format.
 92
 93    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 94
 95    Parameters
 96    ----------
 97    obsl : list
 98        List of Obs that will be exported.
 99        The Obs inside a structure have to be defined on the same ensemble.
100    name : str
101        The name of the observable.
102    spec : str
103        Optional string that describes the contents of the file.
104    origin : str
105        Specify where the data has its origin.
106    symbol : list
107        A list of symbols that describe the observables to be written. May be empty.
108    enstag : str
109        Enstag that is written to pobs. If None, the ensemble name is used.
110
111    Returns
112    -------
113    xml_str : str
114        XML formatted string of the input data
115    """
116
117    od = {}
118    ename = obsl[0].e_names[0]
119    names = list(obsl[0].deltas.keys())
120    nr = len(names)
121    onames = [name.replace('|', '') for name in names]
122    for o in obsl:
123        if len(o.e_names) != 1:
124            raise Exception('You try to export dobs to obs!')
125        if o.e_names[0] != ename:
126            raise Exception('You try to export dobs to obs!')
127        if len(o.deltas.keys()) != nr:
128            raise Exception('Incompatible obses in list')
129    od['observables'] = {}
130    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
131    od['observables']['origin'] = {
132        'who': getpass.getuser(),
133        'date': str(datetime.datetime.now())[:-7],
134        'host': socket.gethostname(),
135        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
136    od['observables']['pobs'] = {}
137    pd = od['observables']['pobs']
138    pd['spec'] = spec
139    pd['origin'] = origin
140    pd['name'] = name
141    if enstag:
142        if not isinstance(enstag, str):
143            raise Exception('enstag has to be a string!')
144        pd['enstag'] = enstag
145    else:
146        pd['enstag'] = ename
147    pd['nr'] = '%d' % (nr)
148    pd['array'] = []
149    osymbol = 'cfg'
150    if not isinstance(symbol, list):
151        raise Exception('Symbol has to be a list!')
152    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
153        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
154    for s in symbol:
155        osymbol += ' %s' % s
156    for r in range(nr):
157        ad = {}
158        ad['id'] = onames[r]
159        Nconf = len(obsl[0].deltas[names[r]])
160        layout = '%d i f%d' % (Nconf, len(obsl))
161        ad['layout'] = layout
162        ad['symbol'] = osymbol
163        data = ''
164        for c in range(Nconf):
165            data += '%d ' % obsl[0].idl[names[r]][c]
166            for o in obsl:
167                num = o.deltas[names[r]][c] + o.r_values[names[r]]
168                if num == 0:
169                    data += '0 '
170                else:
171                    data += '%1.16e ' % (num)
172            data += '\n'
173        ad['#data'] = data
174        pd['array'].append(ad)
175
176    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
177    return rs

Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
Returns
  • xml_str (str): XML formatted string of the input data
def write_pobs( obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
180def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
181    """Export a list of Obs or structures containing Obs to a .xml.gz file
182    according to the Zeuthen pobs format.
183
184    Tags are not written or recovered automatically. The separator | is removed from the replica names.
185
186    Parameters
187    ----------
188    obsl : list
189        List of Obs that will be exported.
190        The Obs inside a structure have to be defined on the same ensemble.
191    fname : str
192        Filename of the output file.
193    name : str
194        The name of the observable.
195    spec : str
196        Optional string that describes the contents of the file.
197    origin : str
198        Specify where the data has its origin.
199    symbol : list
200        A list of symbols that describe the observables to be written. May be empty.
201    enstag : str
202        Enstag that is written to pobs. If None, the ensemble name is used.
203    gz : bool
204        If True, the output is a gzipped xml. If False, the output is an xml file.
205
206    Returns
207    -------
208    None
209    """
210    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
211
212    if not fname.endswith('.xml') and not fname.endswith('.gz'):
213        fname += '.xml'
214
215    if gz:
216        if not fname.endswith('.gz'):
217            fname += '.gz'
218
219        fp = gzip.open(fname, 'wb')
220        fp.write(pobsstring.encode('utf-8'))
221    else:
222        fp = open(fname, 'w', encoding='utf-8')
223        fp.write(pobsstring)
224    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
Returns
  • None
def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
301def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
302    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
303
304    Tags are not written or recovered automatically.
305
306    Parameters
307    ----------
308    fname : str
309        Filename of the input file.
310    full_output : bool
311        If True, a dict containing auxiliary information and the data is returned.
312        If False, only the data is returned as list.
313    separatior_insertion: str or int
314        str: replace all occurences of "separator_insertion" within the replica names
315        by "|%s" % (separator_insertion) when constructing the names of the replica.
316        int: Insert the separator "|" at the position given by separator_insertion.
317        None (default): Replica names remain unchanged.
318
319    Returns
320    -------
321    res : list[Obs]
322        Imported data
323    or
324    res : dict
325        Imported data and meta-data
326    """
327
328    if not fname.endswith('.xml') and not fname.endswith('.gz'):
329        fname += '.xml'
330    if gz:
331        if not fname.endswith('.gz'):
332            fname += '.gz'
333        with gzip.open(fname, 'r') as fin:
334            content = fin.read()
335    else:
336        if fname.endswith('.gz'):
337            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
338        with open(fname, 'r') as fin:
339            content = fin.read()
340
341    # parse xml file content
342    root = et.fromstring(content)
343
344    _check(root[2].tag == 'pobs')
345    pobs = root[2]
346
347    version = root[0][1].text.strip()
348
349    _check(root[1].tag == 'origin')
350    file_origin = _etree_to_dict(root[1])['origin']
351
352    deltas = []
353    names = []
354    idl = []
355    for i in range(5, len(pobs)):
356        delta, name, idx = _import_rdata(pobs[i])
357        deltas.append(delta)
358        if separator_insertion is None:
359            pass
360        elif isinstance(separator_insertion, int):
361            name = name[:separator_insertion] + '|' + name[separator_insertion:]
362        elif isinstance(separator_insertion, str):
363            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
364        else:
365            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
366        names.append(name)
367        idl.append(idx)
368    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
369
370    descriptiond = {}
371    for i in range(4):
372        descriptiond[pobs[i].tag] = pobs[i].text.strip()
373
374    _check(pobs[4].tag == "nr")
375
376    _check(pobs[5].tag == 'array')
377    if pobs[5][1].tag == 'symbol':
378        symbol = pobs[5][1].text.strip()
379        descriptiond['symbol'] = symbol
380
381    if full_output:
382        retd = {}
383        tool = file_origin.get('tool', None)
384        if tool:
385            program = tool['name'] + ' ' + tool['version']
386        else:
387            program = ''
388        retd['program'] = program
389        retd['version'] = version
390        retd['who'] = file_origin['who']
391        retd['date'] = file_origin['date']
392        retd['host'] = file_origin['host']
393        retd['description'] = descriptiond
394        retd['obsdata'] = res
395        return retd
396    else:
397        return res

Import a list of Obs from an xml.gz file in the Zeuthen pobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
Returns
  • res (list[Obs]): Imported data
  • or
  • res (dict): Imported data and meta-data
def import_dobs_string(content, full_output=False, separator_insertion=True):
401def import_dobs_string(content, full_output=False, separator_insertion=True):
402    """Import a list of Obs from a string in the Zeuthen dobs format.
403
404    Tags are not written or recovered automatically.
405
406    Parameters
407    ----------
408    content : str
409        XML string containing the data
410    full_output : bool
411        If True, a dict containing auxiliary information and the data is returned.
412        If False, only the data is returned as list.
413    separatior_insertion: str, int or bool
414        str: replace all occurences of "separator_insertion" within the replica names
415        by "|%s" % (separator_insertion) when constructing the names of the replica.
416        int: Insert the separator "|" at the position given by separator_insertion.
417        True (default): separator "|" is inserted after len(ensname), assuming that the
418        ensemble name is a prefix to the replica name.
419        None or False: No separator is inserted.
420
421    Returns
422    -------
423    res : list[Obs]
424        Imported data
425    or
426    res : dict
427        Imported data and meta-data
428    """
429
430    root = et.fromstring(content)
431
432    _check(root.tag == 'OBSERVABLES')
433    _check(root[0].tag == 'SCHEMA')
434    version = root[0][1].text.strip()
435
436    _check(root[1].tag == 'origin')
437    file_origin = _etree_to_dict(root[1])['origin']
438
439    _check(root[2].tag == 'dobs')
440
441    dobs = root[2]
442
443    descriptiond = {}
444    for i in range(3):
445        descriptiond[dobs[i].tag] = dobs[i].text.strip()
446
447    _check(dobs[3].tag == 'array')
448
449    symbol = []
450    if dobs[3][1].tag == 'symbol':
451        symbol = dobs[3][1].text.strip()
452        descriptiond['symbol'] = symbol
453    mean = _import_array(dobs[3])[0]
454
455    _check(dobs[4].tag == "ne")
456    ne = int(dobs[4].text.strip())
457    _check(dobs[5].tag == "nc")
458
459    idld = {}
460    deltad = {}
461    covd = {}
462    gradd = {}
463    names = []
464    e_names = []
465    enstags = {}
466    for k in range(6, len(list(dobs))):
467        if dobs[k].tag == "edata":
468            _check(dobs[k][0].tag == "enstag")
469            ename = dobs[k][0].text.strip()
470            e_names.append(ename)
471            _check(dobs[k][1].tag == "nr")
472            R = int(dobs[k][1].text.strip())
473            for i in range(2, 2 + R):
474                deltas, rname, idx = _import_rdata(dobs[k][i])
475                if separator_insertion is None or False:
476                    pass
477                elif separator_insertion is True:
478                    if rname.startswith(ename):
479                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
480                elif isinstance(separator_insertion, int):
481                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
482                elif isinstance(separator_insertion, str):
483                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
484                else:
485                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
486                if '|' in rname:
487                    new_ename = rname[:rname.index('|')]
488                else:
489                    new_ename = ename
490                enstags[new_ename] = ename
491                idld[rname] = idx
492                deltad[rname] = deltas
493                names.append(rname)
494        elif dobs[k].tag == "cdata":
495            cname, cov, grad = _import_cdata(dobs[k])
496            covd[cname] = cov
497            if grad.shape[1] == 1:
498                gradd[cname] = [grad for i in range(len(mean))]
499            else:
500                gradd[cname] = grad.T
501        else:
502            _check(False)
503    names = list(set(names))
504
505    for name in names:
506        for i in range(len(deltad[name])):
507            tmp = np.zeros_like(deltad[name][i])
508            for j in range(len(deltad[name][i])):
509                if deltad[name][i][j] != 0.:
510                    tmp[j] = deltad[name][i][j] + mean[i]
511            deltad[name][i] = tmp
512
513    res = []
514    for i in range(len(mean)):
515        deltas = []
516        idl = []
517        obs_names = []
518        for name in names:
519            h = np.unique(deltad[name][i])
520            if len(h) == 1 and np.all(h == mean[i]):
521                continue
522            repdeltas = []
523            repidl = []
524            for j in range(len(deltad[name][i])):
525                if deltad[name][i][j] != 0.:
526                    repdeltas.append(deltad[name][i][j])
527                    repidl.append(idld[name][j])
528            if len(repdeltas) > 0:
529                obs_names.append(name)
530                deltas.append(repdeltas)
531                idl.append(repidl)
532
533        res.append(Obs(deltas, obs_names, idl=idl))
534        res[-1]._value = mean[i]
535    _check(len(e_names) == ne)
536
537    cnames = list(covd.keys())
538    for i in range(len(res)):
539        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
540        for name in cnames:
541            if np.all(new_covobs[name].grad == 0):
542                del new_covobs[name]
543        cnames_loc = list(new_covobs.keys())
544        for name in cnames_loc:
545            res[i].names.append(name)
546            res[i].shape[name] = 1
547            res[i].idl[name] = []
548        res[i]._covobs = new_covobs
549
550    if symbol:
551        for i in range(len(res)):
552            res[i].tag = symbol[i]
553            if res[i].tag == 'None':
554                res[i].tag = None
555    if full_output:
556        retd = {}
557        tool = file_origin.get('tool', None)
558        if tool:
559            program = tool['name'] + ' ' + tool['version']
560        else:
561            program = ''
562        retd['program'] = program
563        retd['version'] = version
564        retd['who'] = file_origin['who']
565        retd['date'] = file_origin['date']
566        retd['host'] = file_origin['host']
567        retd['description'] = descriptiond
568        retd['enstags'] = enstags
569        retd['obsdata'] = res
570        return retd
571    else:
572        return res

Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • content (str): XML string containing the data
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
Returns
  • res (list[Obs]): Imported data
  • or
  • res (dict): Imported data and meta-data
def read_dobs(fname, full_output=False, gz=True, separator_insertion=True):
575def read_dobs(fname, full_output=False, gz=True, separator_insertion=True):
576    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
577
578    Tags are not written or recovered automatically.
579
580    Parameters
581    ----------
582    fname : str
583        Filename of the input file.
584    full_output : bool
585        If True, a dict containing auxiliary information and the data is returned.
586        If False, only the data is returned as list.
587    gz : bool
588        If True, assumes that data is gzipped. If False, assumes XML file.
589    separatior_insertion: str, int or bool
590        str: replace all occurences of "separator_insertion" within the replica names
591        by "|%s" % (separator_insertion) when constructing the names of the replica.
592        int: Insert the separator "|" at the position given by separator_insertion.
593        True (default): separator "|" is inserted after len(ensname), assuming that the
594        ensemble name is a prefix to the replica name.
595        None or False: No separator is inserted.
596
597    Returns
598    -------
599    res : list[Obs]
600        Imported data
601    or
602    res : dict
603        Imported data and meta-data
604    """
605
606    if not fname.endswith('.xml') and not fname.endswith('.gz'):
607        fname += '.xml'
608    if gz:
609        if not fname.endswith('.gz'):
610            fname += '.gz'
611        with gzip.open(fname, 'r') as fin:
612            content = fin.read()
613    else:
614        if fname.endswith('.gz'):
615            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
616        with open(fname, 'r') as fin:
617            content = fin.read()
618
619    return import_dobs_string(content, full_output, separator_insertion=separator_insertion)

Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
Returns
  • res (list[Obs]): Imported data
  • or
  • res (dict): Imported data and meta-data
def create_dobs_string( obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
681def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
682    """Generate the string for the export of a list of Obs or structures containing Obs
683    to a .xml.gz file according to the Zeuthen dobs format.
684
685    Tags are not written or recovered automatically. The separator |is removed from the replica names.
686
687    Parameters
688    ----------
689    obsl : list
690        List of Obs that will be exported.
691        The Obs inside a structure do not have to be defined on the same set of configurations,
692        but the storage requirement is increased, if this is not the case.
693    name : str
694        The name of the observable.
695    spec : str
696        Optional string that describes the contents of the file.
697    origin : str
698        Specify where the data has its origin.
699    symbol : list
700        A list of symbols that describe the observables to be written. May be empty.
701    who : str
702        Provide the name of the person that exports the data.
703    enstags : dict
704        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
705        Otherwise, the ensemble name is used.
706
707    Returns
708    -------
709    xml_str : str
710        XML string generated from the data
711    """
712    if enstags is None:
713        enstags = {}
714    od = {}
715    r_names = []
716    for o in obsl:
717        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
718    r_names = sorted(set(r_names))
719    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
720    for tmpname in mc_names:
721        if tmpname not in enstags:
722            enstags[tmpname] = tmpname
723    ne = len(set(mc_names))
724    cov_names = []
725    for o in obsl:
726        cov_names += list(o.cov_names)
727    cov_names = sorted(set(cov_names))
728    nc = len(set(cov_names))
729    od['OBSERVABLES'] = {}
730    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
731    if who is None:
732        who = getpass.getuser()
733    od['OBSERVABLES']['origin'] = {
734        'who': who,
735        'date': str(datetime.datetime.now())[:-7],
736        'host': socket.gethostname(),
737        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
738    od['OBSERVABLES']['dobs'] = {}
739    pd = od['OBSERVABLES']['dobs']
740    pd['spec'] = spec
741    pd['origin'] = origin
742    pd['name'] = name
743    pd['array'] = {}
744    pd['array']['id'] = 'val'
745    pd['array']['layout'] = '1 f%d' % (len(obsl))
746    osymbol = ''
747    if symbol:
748        if not isinstance(symbol, list):
749            raise Exception('Symbol has to be a list!')
750        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
751            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
752        osymbol = symbol[0]
753        for s in symbol[1:]:
754            osymbol += ' %s' % s
755        pd['array']['symbol'] = osymbol
756
757    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
758    pd['ne'] = '%d' % (ne)
759    pd['nc'] = '%d' % (nc)
760    pd['edata'] = []
761    for name in mc_names:
762        ed = {}
763        ed['enstag'] = enstags[name]
764        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
765        nr = len(onames)
766        ed['nr'] = nr
767        ed[''] = []
768
769        for r in range(nr):
770            ad = {}
771            repname = onames[r]
772            ad['id'] = repname.replace('|', '')
773            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
774            Nconf = len(idx)
775            layout = '%d i f%d' % (Nconf, len(obsl))
776            ad['layout'] = layout
777            data = ''
778            counters = [0 for o in obsl]
779            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
780            for ci in idx:
781                data += '%d ' % ci
782                for oi in range(len(obsl)):
783                    o = obsl[oi]
784                    if repname in o.idl:
785                        if counters[oi] < 0:
786                            num = 0
787                            if num == 0:
788                                data += '0 '
789                            else:
790                                data += '%1.16e ' % (num)
791                            continue
792                        if o.idl[repname][counters[oi]] == ci:
793                            num = o.deltas[repname][counters[oi]] + offsets[oi]
794                            if num == 0:
795                                data += '0 '
796                            else:
797                                data += '%1.16e ' % (num)
798                            counters[oi] += 1
799                            if counters[oi] >= len(o.idl[repname]):
800                                counters[oi] = -1
801                        else:
802                            num = 0
803                            if num == 0:
804                                data += '0 '
805                            else:
806                                data += '%1.16e ' % (num)
807                    else:
808                        data += '0 '
809                data += '\n'
810            ad['#data'] = data
811            ed[''].append(ad)
812        pd['edata'].append(ed)
813
814        allcov = {}
815        for o in obsl:
816            for cname in o.cov_names:
817                if cname in allcov:
818                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
819                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
820                else:
821                    allcov[cname] = o.covobs[cname].cov
822        pd['cdata'] = []
823        for cname in cov_names:
824            cd = {}
825            cd['id'] = cname
826
827            covd = {'id': 'cov'}
828            if allcov[cname].shape == ():
829                ncov = 1
830                covd['layout'] = '1 1 f'
831                covd['#data'] = '%1.14e' % (allcov[cname])
832            else:
833                shape = allcov[cname].shape
834                assert (shape[0] == shape[1])
835                ncov = shape[0]
836                covd['layout'] = '%d %d f' % (ncov, ncov)
837                ds = ''
838                for i in range(ncov):
839                    for j in range(ncov):
840                        val = allcov[cname][i][j]
841                        if val == 0:
842                            ds += '0 '
843                        else:
844                            ds += '%1.14e ' % (val)
845                    ds += '\n'
846                covd['#data'] = ds
847
848            gradd = {'id': 'grad'}
849            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
850            ds = ''
851            for i in range(ncov):
852                for o in obsl:
853                    if cname in o.covobs:
854                        val = o.covobs[cname].grad[i].item()
855                        if val != 0:
856                            ds += '%1.14e ' % (val)
857                        else:
858                            ds += '0 '
859                    else:
860                        ds += '0 '
861            gradd['#data'] = ds
862            cd['array'] = [covd, gradd]
863            pd['cdata'].append(cd)
864
865    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
866
867    return rs

Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator |is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
Returns
  • xml_str (str): XML string generated from the data
def write_dobs( obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
870def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
871    """Export a list of Obs or structures containing Obs to a .xml.gz file
872    according to the Zeuthen dobs format.
873
874    Tags are not written or recovered automatically. The separator | is removed from the replica names.
875
876    Parameters
877    ----------
878    obsl : list
879        List of Obs that will be exported.
880        The Obs inside a structure do not have to be defined on the same set of configurations,
881        but the storage requirement is increased, if this is not the case.
882    fname : str
883        Filename of the output file.
884    name : str
885        The name of the observable.
886    spec : str
887        Optional string that describes the contents of the file.
888    origin : str
889        Specify where the data has its origin.
890    symbol : list
891        A list of symbols that describe the observables to be written. May be empty.
892    who : str
893        Provide the name of the person that exports the data.
894    enstags : dict
895        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
896        Otherwise, the ensemble name is used.
897    gz : bool
898        If True, the output is a gzipped XML. If False, the output is a XML file.
899
900    Returns
901    -------
902    None
903    """
904    if enstags is None:
905        enstags = {}
906
907    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
908
909    if not fname.endswith('.xml') and not fname.endswith('.gz'):
910        fname += '.xml'
911
912    if gz:
913        if not fname.endswith('.gz'):
914            fname += '.gz'
915
916        fp = gzip.open(fname, 'wb')
917        fp.write(dobsstring.encode('utf-8'))
918    else:
919        fp = open(fname, 'w', encoding='utf-8')
920        fp.write(dobsstring)
921    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.
Returns
  • None