pyerrors.input.dobs

View Source
  0from collections import defaultdict
  1import gzip
  2import lxml.etree as et
  3import getpass
  4import socket
  5import datetime
  6import json
  7import warnings
  8import numpy as np
  9from ..obs import Obs
 10from ..obs import _merge_idx
 11from ..covobs import Covobs
 12from .. import version as pyerrorsversion
 13
 14
 15# Based on https://stackoverflow.com/a/10076823
 16def _etree_to_dict(t):
 17    """ Convert the content of an XML file to a python dict"""
 18    d = {t.tag: {} if t.attrib else None}
 19    children = list(t)
 20    if children:
 21        dd = defaultdict(list)
 22        for dc in map(_etree_to_dict, children):
 23            for k, v in dc.items():
 24                dd[k].append(v)
 25        d = {t.tag: {k: v[0] if len(v) == 1 else v
 26                     for k, v in dd.items()}}
 27    if t.attrib:
 28        d[t.tag].update(('@' + k, v)
 29                        for k, v in t.attrib.items())
 30    if t.text:
 31        text = t.text.strip()
 32        if children or t.attrib:
 33            if text:
 34                d[t.tag]['#data'] = [text]
 35        else:
 36            d[t.tag] = text
 37    return d
 38
 39
 40def _dict_to_xmlstring(d):
 41    if isinstance(d, dict):
 42        iters = ''
 43        for k in d:
 44            if k.startswith('#'):
 45                for la in d[k]:
 46                    iters += la
 47                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
 48                return iters
 49            if isinstance(d[k], dict):
 50                iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
 51            elif isinstance(d[k], str):
 52                if len(d[k]) > 100:
 53                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
 54                else:
 55                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
 56            elif isinstance(d[k], list):
 57                for i in range(len(d[k])):
 58                    iters += _dict_to_xmlstring(d[k][i])
 59            elif not d[k]:
 60                return '\n'
 61            else:
 62                raise Exception('Type', type(d[k]), 'not supported in export!')
 63    else:
 64        raise Exception('Type', type(d), 'not supported in export!')
 65    return iters
 66
 67
 68def _dict_to_xmlstring_spaces(d, space='  '):
 69    s = _dict_to_xmlstring(d)
 70    o = ''
 71    c = 0
 72    cm = False
 73    for li in s.split('\n'):
 74        if li.startswith('<%s' % ('/')):
 75            c -= 1
 76            cm = True
 77        for i in range(c):
 78            o += space
 79        o += li + '\n'
 80        if li.startswith('<') and not cm:
 81            if not '<%s' % ('/') in li:
 82                c += 1
 83        cm = False
 84    return o
 85
 86
 87def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 88    """Export a list of Obs or structures containing Obs to an xml string
 89    according to the Zeuthen pobs format.
 90
 91    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 92
 93    Parameters
 94    ----------
 95    obsl : list
 96        List of Obs that will be exported.
 97        The Obs inside a structure have to be defined on the same ensemble.
 98    name : str
 99        The name of the observable.
100    spec : str
101        Optional string that describes the contents of the file.
102    origin : str
103        Specify where the data has its origin.
104    symbol : list
105        A list of symbols that describe the observables to be written. May be empty.
106    enstag : str
107        Enstag that is written to pobs. If None, the ensemble name is used.
108    """
109
110    od = {}
111    ename = obsl[0].e_names[0]
112    names = list(obsl[0].deltas.keys())
113    nr = len(names)
114    onames = [name.replace('|', '') for name in names]
115    for o in obsl:
116        if len(o.e_names) != 1:
117            raise Exception('You try to export dobs to obs!')
118        if o.e_names[0] != ename:
119            raise Exception('You try to export dobs to obs!')
120        if len(o.deltas.keys()) != nr:
121            raise Exception('Incompatible obses in list')
122    od['observables'] = {}
123    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
124    od['observables']['origin'] = {
125        'who': getpass.getuser(),
126        'date': str(datetime.datetime.now())[:-7],
127        'host': socket.gethostname(),
128        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
129    od['observables']['pobs'] = {}
130    pd = od['observables']['pobs']
131    pd['spec'] = spec
132    pd['origin'] = origin
133    pd['name'] = name
134    if enstag:
135        if not isinstance(enstag, str):
136            raise Exception('enstag has to be a string!')
137        pd['enstag'] = enstag
138    else:
139        pd['enstag'] = ename
140    pd['nr'] = '%d' % (nr)
141    pd['array'] = []
142    osymbol = 'cfg'
143    if not isinstance(symbol, list):
144        raise Exception('Symbol has to be a list!')
145    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
146        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
147    for s in symbol:
148        osymbol += ' %s' % s
149    for r in range(nr):
150        ad = {}
151        ad['id'] = onames[r]
152        Nconf = len(obsl[0].deltas[names[r]])
153        layout = '%d i f%d' % (Nconf, len(obsl))
154        ad['layout'] = layout
155        ad['symbol'] = osymbol
156        data = ''
157        for c in range(Nconf):
158            data += '%d ' % obsl[0].idl[names[r]][c]
159            for o in obsl:
160                num = o.deltas[names[r]][c] + o.r_values[names[r]]
161                if num == 0:
162                    data += '0 '
163                else:
164                    data += '%1.16e ' % (num)
165            data += '\n'
166        ad['#data'] = data
167        pd['array'].append(ad)
168
169    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
170    return rs
171
172
173def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
174    """Export a list of Obs or structures containing Obs to a .xml.gz file
175    according to the Zeuthen pobs format.
176
177    Tags are not written or recovered automatically. The separator | is removed from the replica names.
178
179    Parameters
180    ----------
181    obsl : list
182        List of Obs that will be exported.
183        The Obs inside a structure have to be defined on the same ensemble.
184    fname : str
185        Filename of the output file.
186    name : str
187        The name of the observable.
188    spec : str
189        Optional string that describes the contents of the file.
190    origin : str
191        Specify where the data has its origin.
192    symbol : list
193        A list of symbols that describe the observables to be written. May be empty.
194    enstag : str
195        Enstag that is written to pobs. If None, the ensemble name is used.
196    gz : bool
197        If True, the output is a gzipped xml. If False, the output is an xml file.
198    """
199    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
200
201    if not fname.endswith('.xml') and not fname.endswith('.gz'):
202        fname += '.xml'
203
204    if gz:
205        if not fname.endswith('.gz'):
206            fname += '.gz'
207
208        fp = gzip.open(fname, 'wb')
209        fp.write(pobsstring.encode('utf-8'))
210    else:
211        fp = open(fname, 'w', encoding='utf-8')
212        fp.write(pobsstring)
213    fp.close()
214
215
216def _import_data(string):
217    return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]")
218
219
220def _check(condition):
221    if not condition:
222        raise Exception("XML file format not supported")
223
224
225class _NoTagInDataError(Exception):
226    """Raised when tag is not in data"""
227    def __init__(self, tag):
228        self.tag = tag
229        super().__init__('Tag %s not in data!' % (self.tag))
230
231
232def _find_tag(dat, tag):
233    for i in range(len(dat)):
234        if dat[i].tag == tag:
235            return i
236    raise _NoTagInDataError(tag)
237
238
239def _import_array(arr):
240    name = arr[_find_tag(arr, 'id')].text.strip()
241    index = _find_tag(arr, 'layout')
242    try:
243        sindex = _find_tag(arr, 'symbol')
244    except _NoTagInDataError:
245        sindex = 0
246    if sindex > index:
247        tmp = _import_data(arr[sindex].tail)
248    else:
249        tmp = _import_data(arr[index].tail)
250
251    li = arr[index].text.strip()
252    m = li.split()
253    if m[1] == "i" and m[2][0] == "f":
254        nc = int(m[0])
255        na = int(m[2].lstrip('f'))
256        _dat = []
257        mask = []
258        for a in range(na):
259            mask += [a]
260            _dat += [np.array(tmp[1 + a:: na + 1])]
261        _check(len(tmp[0:: na + 1]) == nc)
262        return [name, tmp[0:: na + 1], mask, _dat]
263    elif m[1][0] == 'f' and len(m) < 3:
264        sh = (int(m[0]), int(m[1].lstrip('f')))
265        return np.reshape(tmp, sh)
266    elif any(['f' in s for s in m]):
267        for si in range(len(m)):
268            if m[si] == 'f':
269                break
270        sh = [int(m[i]) for i in range(si)]
271        return np.reshape(tmp, sh)
272    else:
273        print(name, m)
274        _check(False)
275
276
277def _import_rdata(rd):
278    name, idx, mask, deltas = _import_array(rd)
279    return deltas, name, idx
280
281
282def _import_cdata(cd):
283    _check(cd[0].tag == "id")
284    _check(cd[1][0].text.strip() == "cov")
285    cov = _import_array(cd[1])
286    grad = _import_array(cd[2])
287    return cd[0].text.strip(), cov, grad
288
289
290def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
291    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
292
293    Tags are not written or recovered automatically.
294
295    Parameters
296    ----------
297    fname : str
298        Filename of the input file.
299    full_output : bool
300        If True, a dict containing auxiliary information and the data is returned.
301        If False, only the data is returned as list.
302    separatior_insertion: str or int
303        str: replace all occurences of "separator_insertion" within the replica names
304        by "|%s" % (separator_insertion) when constructing the names of the replica.
305        int: Insert the separator "|" at the position given by separator_insertion.
306        None (default): Replica names remain unchanged.
307    """
308
309    if not fname.endswith('.xml') and not fname.endswith('.gz'):
310        fname += '.xml'
311    if gz:
312        if not fname.endswith('.gz'):
313            fname += '.gz'
314        with gzip.open(fname, 'r') as fin:
315            content = fin.read()
316    else:
317        if fname.endswith('.gz'):
318            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
319        with open(fname, 'r') as fin:
320            content = fin.read()
321
322    # parse xml file content
323    root = et.fromstring(content)
324
325    _check(root[2].tag == 'pobs')
326    pobs = root[2]
327
328    version = root[0][1].text.strip()
329
330    _check(root[1].tag == 'origin')
331    file_origin = _etree_to_dict(root[1])['origin']
332
333    deltas = []
334    names = []
335    idl = []
336    for i in range(5, len(pobs)):
337        delta, name, idx = _import_rdata(pobs[i])
338        deltas.append(delta)
339        if separator_insertion is None:
340            pass
341        elif isinstance(separator_insertion, int):
342            name = name[:separator_insertion] + '|' + name[separator_insertion:]
343        elif isinstance(separator_insertion, str):
344            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
345        else:
346            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
347        names.append(name)
348        idl.append(idx)
349    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
350
351    descriptiond = {}
352    for i in range(4):
353        descriptiond[pobs[i].tag] = pobs[i].text.strip()
354
355    _check(pobs[4].tag == "nr")
356
357    _check(pobs[5].tag == 'array')
358    if pobs[5][1].tag == 'symbol':
359        symbol = pobs[5][1].text.strip()
360        descriptiond['symbol'] = symbol
361
362    if full_output:
363        retd = {}
364        tool = file_origin.get('tool', None)
365        if tool:
366            program = tool['name'] + ' ' + tool['version']
367        else:
368            program = ''
369        retd['program'] = program
370        retd['version'] = version
371        retd['who'] = file_origin['who']
372        retd['date'] = file_origin['date']
373        retd['host'] = file_origin['host']
374        retd['description'] = descriptiond
375        retd['obsdata'] = res
376        return retd
377    else:
378        return res
379
380
381# Reading (and writing) dobs is not yet working properly:
382# we have to loop over root[2:] because each entry is a dobs
383# But maybe this is just a problem with Ben's implementation
384
385# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
386def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
387    """Import a list of Obs from a string in the Zeuthen dobs format.
388
389    Tags are not written or recovered automatically.
390
391    Parameters
392    ----------
393    content : str
394        XML string containing the data
395    noemtpy : bool
396        If True, ensembles with no contribution to the Obs are not included.
397        If False, ensembles are included as written in the file, possibly with vanishing entries.
398    full_output : bool
399        If True, a dict containing auxiliary information and the data is returned.
400        If False, only the data is returned as list.
401    separatior_insertion: str, int or bool
402        str: replace all occurences of "separator_insertion" within the replica names
403        by "|%s" % (separator_insertion) when constructing the names of the replica.
404        int: Insert the separator "|" at the position given by separator_insertion.
405        True (default): separator "|" is inserted after len(ensname), assuming that the
406        ensemble name is a prefix to the replica name.
407        None or False: No separator is inserted.
408    """
409
410    root = et.fromstring(content)
411
412    _check(root.tag == 'OBSERVABLES')
413    _check(root[0].tag == 'SCHEMA')
414    version = root[0][1].text.strip()
415
416    _check(root[1].tag == 'origin')
417    file_origin = _etree_to_dict(root[1])['origin']
418
419    _check(root[2].tag == 'dobs')
420
421    dobs = root[2]
422
423    descriptiond = {}
424    for i in range(3):
425        descriptiond[dobs[i].tag] = dobs[i].text.strip()
426
427    _check(dobs[3].tag == 'array')
428
429    symbol = []
430    if dobs[3][1].tag == 'symbol':
431        symbol = dobs[3][1].text.strip()
432        descriptiond['symbol'] = symbol
433    mean = _import_array(dobs[3])[0]
434
435    _check(dobs[4].tag == "ne")
436    ne = int(dobs[4].text.strip())
437    _check(dobs[5].tag == "nc")
438    nc = int(dobs[5].text.strip())
439
440    idld = {}
441    deltad = {}
442    covd = {}
443    gradd = {}
444    names = []
445    e_names = []
446    enstags = {}
447    for k in range(6, len(list(dobs))):
448        if dobs[k].tag == "edata":
449            _check(dobs[k][0].tag == "enstag")
450            ename = dobs[k][0].text.strip()
451            e_names.append(ename)
452            _check(dobs[k][1].tag == "nr")
453            R = int(dobs[k][1].text.strip())
454            for i in range(2, 2 + R):
455                deltas, rname, idx = _import_rdata(dobs[k][i])
456                if separator_insertion is None or False:
457                    pass
458                elif separator_insertion is True:
459                    if rname.startswith(ename):
460                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
461                elif isinstance(separator_insertion, int):
462                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
463                elif isinstance(separator_insertion, str):
464                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
465                else:
466                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
467                if '|' in rname:
468                    new_ename = rname[:rname.index('|')]
469                else:
470                    new_ename = ename
471                enstags[new_ename] = ename
472                idld[rname] = idx
473                deltad[rname] = deltas
474                names.append(rname)
475        elif dobs[k].tag == "cdata":
476            cname, cov, grad = _import_cdata(dobs[k])
477            covd[cname] = cov
478            if grad.shape[1] == 1:
479                gradd[cname] = [grad for i in range(len(mean))]
480            else:
481                gradd[cname] = grad.T
482        else:
483            _check(False)
484    names = list(set(names))
485
486    for name in names:
487        for i in range(len(deltad[name])):
488            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
489
490    res = []
491    for i in range(len(mean)):
492        deltas = []
493        idl = []
494        obs_names = []
495        for name in names:
496            h = np.unique(deltad[name][i])
497            if len(h) == 1 and np.all(h == mean[i]) and noempty:
498                continue
499            deltas.append(deltad[name][i])
500            obs_names.append(name)
501            idl.append(idld[name])
502        res.append(Obs(deltas, obs_names, idl=idl))
503    print(mean, 'vs', res)
504    _check(len(e_names) == ne)
505
506    cnames = list(covd.keys())
507    for i in range(len(res)):
508        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
509        if noempty:
510            for name in cnames:
511                if np.all(new_covobs[name].grad == 0):
512                    del new_covobs[name]
513            cnames_loc = list(new_covobs.keys())
514        else:
515            cnames_loc = cnames
516        for name in cnames_loc:
517            res[i].names.append(name)
518            res[i].shape[name] = 1
519            res[i].idl[name] = []
520        res[i]._covobs = new_covobs
521
522    if symbol:
523        for i in range(len(res)):
524            res[i].tag = symbol[i]
525            if res[i].tag == 'None':
526                res[i].tag = None
527    if not noempty:
528        _check(len(res[0].covobs.keys()) == nc)
529    if full_output:
530        retd = {}
531        tool = file_origin.get('tool', None)
532        if tool:
533            program = tool['name'] + ' ' + tool['version']
534        else:
535            program = ''
536        retd['program'] = program
537        retd['version'] = version
538        retd['who'] = file_origin['who']
539        retd['date'] = file_origin['date']
540        retd['host'] = file_origin['host']
541        retd['description'] = descriptiond
542        retd['enstags'] = enstags
543        retd['obsdata'] = res
544        return retd
545    else:
546        return res
547
548
549def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
550    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
551
552    Tags are not written or recovered automatically.
553
554    Parameters
555    ----------
556    fname : str
557        Filename of the input file.
558    noemtpy : bool
559        If True, ensembles with no contribution to the Obs are not included.
560        If False, ensembles are included as written in the file.
561    full_output : bool
562        If True, a dict containing auxiliary information and the data is returned.
563        If False, only the data is returned as list.
564    gz : bool
565        If True, assumes that data is gzipped. If False, assumes XML file.
566    separatior_insertion: str, int or bool
567        str: replace all occurences of "separator_insertion" within the replica names
568        by "|%s" % (separator_insertion) when constructing the names of the replica.
569        int: Insert the separator "|" at the position given by separator_insertion.
570        True (default): separator "|" is inserted after len(ensname), assuming that the
571        ensemble name is a prefix to the replica name.
572        None or False: No separator is inserted.
573    """
574
575    if not fname.endswith('.xml') and not fname.endswith('.gz'):
576        fname += '.xml'
577    if gz:
578        if not fname.endswith('.gz'):
579            fname += '.gz'
580        with gzip.open(fname, 'r') as fin:
581            content = fin.read().decode('utf-8')
582    else:
583        if fname.endswith('.gz'):
584            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
585        with open(fname, 'r', encoding='utf-8') as fin:
586            content = fin.read()
587
588    # open and read gzipped xml file
589    infile = gzip.open(fname)
590    content = infile.read()
591
592    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
593
594
595def _dobsdict_to_xmlstring(d):
596    if isinstance(d, dict):
597        iters = ''
598        for k in d:
599            if k.startswith('#value'):
600                for li in d[k]:
601                    iters += li
602                return iters + '\n'
603            elif k.startswith('#'):
604                for li in d[k]:
605                    iters += li
606                iters = '<array>\n' + iters + '<%sarray>\n' % ('/')
607                return iters
608            if isinstance(d[k], dict):
609                iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k)
610            elif isinstance(d[k], str):
611                if len(d[k]) > 100:
612                    iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k)
613                else:
614                    iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k)
615            elif isinstance(d[k], list):
616                tmps = ''
617                if k in ['edata', 'cdata']:
618                    for i in range(len(d[k])):
619                        tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k)
620                else:
621                    for i in range(len(d[k])):
622                        tmps += _dobsdict_to_xmlstring(d[k][i])
623                iters += tmps
624            elif isinstance(d[k], (int, float)):
625                iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k)
626            elif not d[k]:
627                return '\n'
628            else:
629                raise Exception('Type', type(d[k]), 'not supported in export!')
630    else:
631        raise Exception('Type', type(d), 'not supported in export!')
632    return iters
633
634
635def _dobsdict_to_xmlstring_spaces(d, space='  '):
636    s = _dobsdict_to_xmlstring(d)
637    o = ''
638    c = 0
639    cm = False
640    for li in s.split('\n'):
641        if li.startswith('<%s' % ('/')):
642            c -= 1
643            cm = True
644        for i in range(c):
645            o += space
646        o += li + '\n'
647        if li.startswith('<') and not cm:
648            if not '<%s' % ('/') in li:
649                c += 1
650        cm = False
651    return o
652
653
654def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}):
655    """Generate the string for the export of a list of Obs or structures containing Obs
656    to a .xml.gz file according to the Zeuthen dobs format.
657
658    Tags are not written or recovered automatically. The separator |is removed from the replica names.
659
660    Parameters
661    ----------
662    obsl : list
663        List of Obs that will be exported.
664        The Obs inside a structure do not have to be defined on the same set of configurations,
665        but the storage requirement is increased, if this is not the case.
666    name : str
667        The name of the observable.
668    spec : str
669        Optional string that describes the contents of the file.
670    origin : str
671        Specify where the data has its origin.
672    symbol : list
673        A list of symbols that describe the observables to be written. May be empty.
674    who : str
675        Provide the name of the person that exports the data.
676    enstags : dict
677        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
678        Otherwise, the ensemble name is used.
679    """
680    od = {}
681    r_names = []
682    for o in obsl:
683        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
684    r_names = sorted(set(r_names))
685    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
686    for tmpname in mc_names:
687        if tmpname not in enstags:
688            enstags[tmpname] = tmpname
689    ne = len(set(mc_names))
690    cov_names = []
691    for o in obsl:
692        cov_names += list(o.cov_names)
693    cov_names = sorted(set(cov_names))
694    nc = len(set(cov_names))
695    od['OBSERVABLES'] = {}
696    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
697    if who is None:
698        who = getpass.getuser()
699    od['OBSERVABLES']['origin'] = {
700        'who': who,
701        'date': str(datetime.datetime.now())[:-7],
702        'host': socket.gethostname(),
703        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
704    od['OBSERVABLES']['dobs'] = {}
705    pd = od['OBSERVABLES']['dobs']
706    pd['spec'] = spec
707    pd['origin'] = origin
708    pd['name'] = name
709    pd['array'] = {}
710    pd['array']['id'] = 'val'
711    pd['array']['layout'] = '1 f%d' % (len(obsl))
712    osymbol = ''
713    if symbol:
714        if not isinstance(symbol, list):
715            raise Exception('Symbol has to be a list!')
716        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
717            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
718        osymbol = symbol[0]
719        for s in symbol[1:]:
720            osymbol += ' %s' % s
721        pd['array']['symbol'] = osymbol
722
723    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
724    pd['ne'] = '%d' % (ne)
725    pd['nc'] = '%d' % (nc)
726    pd['edata'] = []
727    for name in mc_names:
728        ed = {}
729        ed['enstag'] = enstags[name]
730        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
731        nr = len(onames)
732        ed['nr'] = nr
733        ed[''] = []
734
735        for r in range(nr):
736            ad = {}
737            repname = onames[r]
738            ad['id'] = repname.replace('|', '')
739            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
740            Nconf = len(idx)
741            layout = '%d i f%d' % (Nconf, len(obsl))
742            ad['layout'] = layout
743            data = ''
744            counters = [0 for o in obsl]
745            for ci in idx:
746                data += '%d ' % ci
747                for oi in range(len(obsl)):
748                    o = obsl[oi]
749                    if repname in o.idl:
750                        if counters[oi] < 0:
751                            data += '0 '
752                            continue
753                        if o.idl[repname][counters[oi]] == ci:
754                            num = o.deltas[repname][counters[oi]]
755                            if num == 0:
756                                data += '0 '
757                            else:
758                                data += '%1.16e ' % (num)
759                            counters[oi] += 1
760                            if counters[oi] >= len(o.idl[repname]):
761                                counters[oi] = -1
762                        else:
763                            data += '0 '
764                    else:
765                        data += '0 '
766                data += '\n'
767            ad['#data'] = data
768            ed[''].append(ad)
769        pd['edata'].append(ed)
770
771        allcov = {}
772        for o in obsl:
773            for name in o.cov_names:
774                if name in allcov:
775                    if not np.array_equal(allcov[name], o.covobs[name].cov):
776                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
777                else:
778                    allcov[name] = o.covobs[name].cov
779        pd['cdata'] = []
780        for name in cov_names:
781            cd = {}
782            cd['id'] = name
783
784            covd = {'id': 'cov'}
785            if allcov[name].shape == ():
786                ncov = 1
787                covd['layout'] = '1 1 f'
788                covd['#data'] = '%1.14e' % (allcov[name])
789            else:
790                shape = allcov[name].shape
791                assert (shape[0] == shape[1])
792                ncov = shape[0]
793                covd['layout'] = '%d %d f' % (ncov, ncov)
794                ds = ''
795                for i in range(ncov):
796                    for j in range(ncov):
797                        val = allcov[name][i][j]
798                        if val == 0:
799                            ds += '0 '
800                        else:
801                            ds += '%1.14e ' % (val)
802                    ds += '\n'
803                covd['#data'] = ds
804
805            gradd = {'id': 'grad'}
806            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
807            ds = ''
808            for i in range(ncov):
809                for o in obsl:
810                    if name in o.covobs:
811                        val = o.covobs[name].grad[i]
812                        if val != 0:
813                            ds += '%1.14e ' % (val)
814                        else:
815                            ds += '0 '
816                    else:
817                        ds += '0 '
818            gradd['#data'] = ds
819            cd['array'] = [covd, gradd]
820            pd['cdata'].append(cd)
821
822    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
823
824    return rs
825
826
827def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True):
828    """Export a list of Obs or structures containing Obs to a .xml.gz file
829    according to the Zeuthen dobs format.
830
831    Tags are not written or recovered automatically. The separator | is removed from the replica names.
832
833    Parameters
834    ----------
835    obsl : list
836        List of Obs that will be exported.
837        The Obs inside a structure do not have to be defined on the same set of configurations,
838        but the storage requirement is increased, if this is not the case.
839    fname : str
840        Filename of the output file.
841    name : str
842        The name of the observable.
843    spec : str
844        Optional string that describes the contents of the file.
845    origin : str
846        Specify where the data has its origin.
847    symbol : list
848        A list of symbols that describe the observables to be written. May be empty.
849    who : str
850        Provide the name of the person that exports the data.
851    enstags : dict
852        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
853        Otherwise, the ensemble name is used.
854    gz : bool
855        If True, the output is a gzipped XML. If False, the output is a XML file.
856    """
857
858    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
859
860    if not fname.endswith('.xml') and not fname.endswith('.gz'):
861        fname += '.xml'
862
863    if gz:
864        if not fname.endswith('.gz'):
865            fname += '.gz'
866
867        fp = gzip.open(fname, 'wb')
868        fp.write(dobsstring.encode('utf-8'))
869    else:
870        fp = open(fname, 'w', encoding='utf-8')
871        fp.write(dobsstring)
872    fp.close()
#   def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
View Source
 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None):
 89    """Export a list of Obs or structures containing Obs to an xml string
 90    according to the Zeuthen pobs format.
 91
 92    Tags are not written or recovered automatically. The separator | is removed from the replica names.
 93
 94    Parameters
 95    ----------
 96    obsl : list
 97        List of Obs that will be exported.
 98        The Obs inside a structure have to be defined on the same ensemble.
 99    name : str
100        The name of the observable.
101    spec : str
102        Optional string that describes the contents of the file.
103    origin : str
104        Specify where the data has its origin.
105    symbol : list
106        A list of symbols that describe the observables to be written. May be empty.
107    enstag : str
108        Enstag that is written to pobs. If None, the ensemble name is used.
109    """
110
111    od = {}
112    ename = obsl[0].e_names[0]
113    names = list(obsl[0].deltas.keys())
114    nr = len(names)
115    onames = [name.replace('|', '') for name in names]
116    for o in obsl:
117        if len(o.e_names) != 1:
118            raise Exception('You try to export dobs to obs!')
119        if o.e_names[0] != ename:
120            raise Exception('You try to export dobs to obs!')
121        if len(o.deltas.keys()) != nr:
122            raise Exception('Incompatible obses in list')
123    od['observables'] = {}
124    od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'}
125    od['observables']['origin'] = {
126        'who': getpass.getuser(),
127        'date': str(datetime.datetime.now())[:-7],
128        'host': socket.gethostname(),
129        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
130    od['observables']['pobs'] = {}
131    pd = od['observables']['pobs']
132    pd['spec'] = spec
133    pd['origin'] = origin
134    pd['name'] = name
135    if enstag:
136        if not isinstance(enstag, str):
137            raise Exception('enstag has to be a string!')
138        pd['enstag'] = enstag
139    else:
140        pd['enstag'] = ename
141    pd['nr'] = '%d' % (nr)
142    pd['array'] = []
143    osymbol = 'cfg'
144    if not isinstance(symbol, list):
145        raise Exception('Symbol has to be a list!')
146    if not (len(symbol) == 0 or len(symbol) == len(obsl)):
147        raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
148    for s in symbol:
149        osymbol += ' %s' % s
150    for r in range(nr):
151        ad = {}
152        ad['id'] = onames[r]
153        Nconf = len(obsl[0].deltas[names[r]])
154        layout = '%d i f%d' % (Nconf, len(obsl))
155        ad['layout'] = layout
156        ad['symbol'] = osymbol
157        data = ''
158        for c in range(Nconf):
159            data += '%d ' % obsl[0].idl[names[r]][c]
160            for o in obsl:
161                num = o.deltas[names[r]][c] + o.r_values[names[r]]
162                if num == 0:
163                    data += '0 '
164                else:
165                    data += '%1.16e ' % (num)
166            data += '\n'
167        ad['#data'] = data
168        pd['array'].append(ad)
169
170    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od)
171    return rs

Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
#   def write_pobs( obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True ):
View Source
174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True):
175    """Export a list of Obs or structures containing Obs to a .xml.gz file
176    according to the Zeuthen pobs format.
177
178    Tags are not written or recovered automatically. The separator | is removed from the replica names.
179
180    Parameters
181    ----------
182    obsl : list
183        List of Obs that will be exported.
184        The Obs inside a structure have to be defined on the same ensemble.
185    fname : str
186        Filename of the output file.
187    name : str
188        The name of the observable.
189    spec : str
190        Optional string that describes the contents of the file.
191    origin : str
192        Specify where the data has its origin.
193    symbol : list
194        A list of symbols that describe the observables to be written. May be empty.
195    enstag : str
196        Enstag that is written to pobs. If None, the ensemble name is used.
197    gz : bool
198        If True, the output is a gzipped xml. If False, the output is an xml file.
199    """
200    pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag)
201
202    if not fname.endswith('.xml') and not fname.endswith('.gz'):
203        fname += '.xml'
204
205    if gz:
206        if not fname.endswith('.gz'):
207            fname += '.gz'
208
209        fp = gzip.open(fname, 'wb')
210        fp.write(pobsstring.encode('utf-8'))
211    else:
212        fp = open(fname, 'w', encoding='utf-8')
213        fp.write(pobsstring)
214    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
#   def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
View Source
291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):
292    """Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
293
294    Tags are not written or recovered automatically.
295
296    Parameters
297    ----------
298    fname : str
299        Filename of the input file.
300    full_output : bool
301        If True, a dict containing auxiliary information and the data is returned.
302        If False, only the data is returned as list.
303    separatior_insertion: str or int
304        str: replace all occurences of "separator_insertion" within the replica names
305        by "|%s" % (separator_insertion) when constructing the names of the replica.
306        int: Insert the separator "|" at the position given by separator_insertion.
307        None (default): Replica names remain unchanged.
308    """
309
310    if not fname.endswith('.xml') and not fname.endswith('.gz'):
311        fname += '.xml'
312    if gz:
313        if not fname.endswith('.gz'):
314            fname += '.gz'
315        with gzip.open(fname, 'r') as fin:
316            content = fin.read()
317    else:
318        if fname.endswith('.gz'):
319            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
320        with open(fname, 'r') as fin:
321            content = fin.read()
322
323    # parse xml file content
324    root = et.fromstring(content)
325
326    _check(root[2].tag == 'pobs')
327    pobs = root[2]
328
329    version = root[0][1].text.strip()
330
331    _check(root[1].tag == 'origin')
332    file_origin = _etree_to_dict(root[1])['origin']
333
334    deltas = []
335    names = []
336    idl = []
337    for i in range(5, len(pobs)):
338        delta, name, idx = _import_rdata(pobs[i])
339        deltas.append(delta)
340        if separator_insertion is None:
341            pass
342        elif isinstance(separator_insertion, int):
343            name = name[:separator_insertion] + '|' + name[separator_insertion:]
344        elif isinstance(separator_insertion, str):
345            name = name.replace(separator_insertion, "|%s" % (separator_insertion))
346        else:
347            raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
348        names.append(name)
349        idl.append(idx)
350    res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))]
351
352    descriptiond = {}
353    for i in range(4):
354        descriptiond[pobs[i].tag] = pobs[i].text.strip()
355
356    _check(pobs[4].tag == "nr")
357
358    _check(pobs[5].tag == 'array')
359    if pobs[5][1].tag == 'symbol':
360        symbol = pobs[5][1].text.strip()
361        descriptiond['symbol'] = symbol
362
363    if full_output:
364        retd = {}
365        tool = file_origin.get('tool', None)
366        if tool:
367            program = tool['name'] + ' ' + tool['version']
368        else:
369            program = ''
370        retd['program'] = program
371        retd['version'] = version
372        retd['who'] = file_origin['who']
373        retd['date'] = file_origin['date']
374        retd['host'] = file_origin['host']
375        retd['description'] = descriptiond
376        retd['obsdata'] = res
377        return retd
378    else:
379        return res

Import a list of Obs from an xml.gz file in the Zeuthen pobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
#   def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
View Source
387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
388    """Import a list of Obs from a string in the Zeuthen dobs format.
389
390    Tags are not written or recovered automatically.
391
392    Parameters
393    ----------
394    content : str
395        XML string containing the data
396    noemtpy : bool
397        If True, ensembles with no contribution to the Obs are not included.
398        If False, ensembles are included as written in the file, possibly with vanishing entries.
399    full_output : bool
400        If True, a dict containing auxiliary information and the data is returned.
401        If False, only the data is returned as list.
402    separatior_insertion: str, int or bool
403        str: replace all occurences of "separator_insertion" within the replica names
404        by "|%s" % (separator_insertion) when constructing the names of the replica.
405        int: Insert the separator "|" at the position given by separator_insertion.
406        True (default): separator "|" is inserted after len(ensname), assuming that the
407        ensemble name is a prefix to the replica name.
408        None or False: No separator is inserted.
409    """
410
411    root = et.fromstring(content)
412
413    _check(root.tag == 'OBSERVABLES')
414    _check(root[0].tag == 'SCHEMA')
415    version = root[0][1].text.strip()
416
417    _check(root[1].tag == 'origin')
418    file_origin = _etree_to_dict(root[1])['origin']
419
420    _check(root[2].tag == 'dobs')
421
422    dobs = root[2]
423
424    descriptiond = {}
425    for i in range(3):
426        descriptiond[dobs[i].tag] = dobs[i].text.strip()
427
428    _check(dobs[3].tag == 'array')
429
430    symbol = []
431    if dobs[3][1].tag == 'symbol':
432        symbol = dobs[3][1].text.strip()
433        descriptiond['symbol'] = symbol
434    mean = _import_array(dobs[3])[0]
435
436    _check(dobs[4].tag == "ne")
437    ne = int(dobs[4].text.strip())
438    _check(dobs[5].tag == "nc")
439    nc = int(dobs[5].text.strip())
440
441    idld = {}
442    deltad = {}
443    covd = {}
444    gradd = {}
445    names = []
446    e_names = []
447    enstags = {}
448    for k in range(6, len(list(dobs))):
449        if dobs[k].tag == "edata":
450            _check(dobs[k][0].tag == "enstag")
451            ename = dobs[k][0].text.strip()
452            e_names.append(ename)
453            _check(dobs[k][1].tag == "nr")
454            R = int(dobs[k][1].text.strip())
455            for i in range(2, 2 + R):
456                deltas, rname, idx = _import_rdata(dobs[k][i])
457                if separator_insertion is None or False:
458                    pass
459                elif separator_insertion is True:
460                    if rname.startswith(ename):
461                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
462                elif isinstance(separator_insertion, int):
463                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
464                elif isinstance(separator_insertion, str):
465                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
466                else:
467                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
468                if '|' in rname:
469                    new_ename = rname[:rname.index('|')]
470                else:
471                    new_ename = ename
472                enstags[new_ename] = ename
473                idld[rname] = idx
474                deltad[rname] = deltas
475                names.append(rname)
476        elif dobs[k].tag == "cdata":
477            cname, cov, grad = _import_cdata(dobs[k])
478            covd[cname] = cov
479            if grad.shape[1] == 1:
480                gradd[cname] = [grad for i in range(len(mean))]
481            else:
482                gradd[cname] = grad.T
483        else:
484            _check(False)
485    names = list(set(names))
486
487    for name in names:
488        for i in range(len(deltad[name])):
489            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
490
491    res = []
492    for i in range(len(mean)):
493        deltas = []
494        idl = []
495        obs_names = []
496        for name in names:
497            h = np.unique(deltad[name][i])
498            if len(h) == 1 and np.all(h == mean[i]) and noempty:
499                continue
500            deltas.append(deltad[name][i])
501            obs_names.append(name)
502            idl.append(idld[name])
503        res.append(Obs(deltas, obs_names, idl=idl))
504    print(mean, 'vs', res)
505    _check(len(e_names) == ne)
506
507    cnames = list(covd.keys())
508    for i in range(len(res)):
509        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
510        if noempty:
511            for name in cnames:
512                if np.all(new_covobs[name].grad == 0):
513                    del new_covobs[name]
514            cnames_loc = list(new_covobs.keys())
515        else:
516            cnames_loc = cnames
517        for name in cnames_loc:
518            res[i].names.append(name)
519            res[i].shape[name] = 1
520            res[i].idl[name] = []
521        res[i]._covobs = new_covobs
522
523    if symbol:
524        for i in range(len(res)):
525            res[i].tag = symbol[i]
526            if res[i].tag == 'None':
527                res[i].tag = None
528    if not noempty:
529        _check(len(res[0].covobs.keys()) == nc)
530    if full_output:
531        retd = {}
532        tool = file_origin.get('tool', None)
533        if tool:
534            program = tool['name'] + ' ' + tool['version']
535        else:
536            program = ''
537        retd['program'] = program
538        retd['version'] = version
539        retd['who'] = file_origin['who']
540        retd['date'] = file_origin['date']
541        retd['host'] = file_origin['host']
542        retd['description'] = descriptiond
543        retd['enstags'] = enstags
544        retd['obsdata'] = res
545        return retd
546    else:
547        return res

Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • content (str): XML string containing the data
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
#   def read_dobs( fname, noempty=False, full_output=False, gz=True, separator_insertion=True ):
View Source
550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
551    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
552
553    Tags are not written or recovered automatically.
554
555    Parameters
556    ----------
557    fname : str
558        Filename of the input file.
559    noemtpy : bool
560        If True, ensembles with no contribution to the Obs are not included.
561        If False, ensembles are included as written in the file.
562    full_output : bool
563        If True, a dict containing auxiliary information and the data is returned.
564        If False, only the data is returned as list.
565    gz : bool
566        If True, assumes that data is gzipped. If False, assumes XML file.
567    separatior_insertion: str, int or bool
568        str: replace all occurences of "separator_insertion" within the replica names
569        by "|%s" % (separator_insertion) when constructing the names of the replica.
570        int: Insert the separator "|" at the position given by separator_insertion.
571        True (default): separator "|" is inserted after len(ensname), assuming that the
572        ensemble name is a prefix to the replica name.
573        None or False: No separator is inserted.
574    """
575
576    if not fname.endswith('.xml') and not fname.endswith('.gz'):
577        fname += '.xml'
578    if gz:
579        if not fname.endswith('.gz'):
580            fname += '.gz'
581        with gzip.open(fname, 'r') as fin:
582            content = fin.read().decode('utf-8')
583    else:
584        if fname.endswith('.gz'):
585            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
586        with open(fname, 'r', encoding='utf-8') as fin:
587            content = fin.read()
588
589    # open and read gzipped xml file
590    infile = gzip.open(fname)
591    content = infile.read()
592
593    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)

Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.

Parameters
  • fname (str): Filename of the input file.
  • noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
  • full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
  • gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
  • separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
#   def create_dobs_string( obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={} ):
View Source
655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}):
656    """Generate the string for the export of a list of Obs or structures containing Obs
657    to a .xml.gz file according to the Zeuthen dobs format.
658
659    Tags are not written or recovered automatically. The separator |is removed from the replica names.
660
661    Parameters
662    ----------
663    obsl : list
664        List of Obs that will be exported.
665        The Obs inside a structure do not have to be defined on the same set of configurations,
666        but the storage requirement is increased, if this is not the case.
667    name : str
668        The name of the observable.
669    spec : str
670        Optional string that describes the contents of the file.
671    origin : str
672        Specify where the data has its origin.
673    symbol : list
674        A list of symbols that describe the observables to be written. May be empty.
675    who : str
676        Provide the name of the person that exports the data.
677    enstags : dict
678        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
679        Otherwise, the ensemble name is used.
680    """
681    od = {}
682    r_names = []
683    for o in obsl:
684        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
685    r_names = sorted(set(r_names))
686    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
687    for tmpname in mc_names:
688        if tmpname not in enstags:
689            enstags[tmpname] = tmpname
690    ne = len(set(mc_names))
691    cov_names = []
692    for o in obsl:
693        cov_names += list(o.cov_names)
694    cov_names = sorted(set(cov_names))
695    nc = len(set(cov_names))
696    od['OBSERVABLES'] = {}
697    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
698    if who is None:
699        who = getpass.getuser()
700    od['OBSERVABLES']['origin'] = {
701        'who': who,
702        'date': str(datetime.datetime.now())[:-7],
703        'host': socket.gethostname(),
704        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
705    od['OBSERVABLES']['dobs'] = {}
706    pd = od['OBSERVABLES']['dobs']
707    pd['spec'] = spec
708    pd['origin'] = origin
709    pd['name'] = name
710    pd['array'] = {}
711    pd['array']['id'] = 'val'
712    pd['array']['layout'] = '1 f%d' % (len(obsl))
713    osymbol = ''
714    if symbol:
715        if not isinstance(symbol, list):
716            raise Exception('Symbol has to be a list!')
717        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
718            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
719        osymbol = symbol[0]
720        for s in symbol[1:]:
721            osymbol += ' %s' % s
722        pd['array']['symbol'] = osymbol
723
724    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
725    pd['ne'] = '%d' % (ne)
726    pd['nc'] = '%d' % (nc)
727    pd['edata'] = []
728    for name in mc_names:
729        ed = {}
730        ed['enstag'] = enstags[name]
731        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
732        nr = len(onames)
733        ed['nr'] = nr
734        ed[''] = []
735
736        for r in range(nr):
737            ad = {}
738            repname = onames[r]
739            ad['id'] = repname.replace('|', '')
740            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
741            Nconf = len(idx)
742            layout = '%d i f%d' % (Nconf, len(obsl))
743            ad['layout'] = layout
744            data = ''
745            counters = [0 for o in obsl]
746            for ci in idx:
747                data += '%d ' % ci
748                for oi in range(len(obsl)):
749                    o = obsl[oi]
750                    if repname in o.idl:
751                        if counters[oi] < 0:
752                            data += '0 '
753                            continue
754                        if o.idl[repname][counters[oi]] == ci:
755                            num = o.deltas[repname][counters[oi]]
756                            if num == 0:
757                                data += '0 '
758                            else:
759                                data += '%1.16e ' % (num)
760                            counters[oi] += 1
761                            if counters[oi] >= len(o.idl[repname]):
762                                counters[oi] = -1
763                        else:
764                            data += '0 '
765                    else:
766                        data += '0 '
767                data += '\n'
768            ad['#data'] = data
769            ed[''].append(ad)
770        pd['edata'].append(ed)
771
772        allcov = {}
773        for o in obsl:
774            for name in o.cov_names:
775                if name in allcov:
776                    if not np.array_equal(allcov[name], o.covobs[name].cov):
777                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
778                else:
779                    allcov[name] = o.covobs[name].cov
780        pd['cdata'] = []
781        for name in cov_names:
782            cd = {}
783            cd['id'] = name
784
785            covd = {'id': 'cov'}
786            if allcov[name].shape == ():
787                ncov = 1
788                covd['layout'] = '1 1 f'
789                covd['#data'] = '%1.14e' % (allcov[name])
790            else:
791                shape = allcov[name].shape
792                assert (shape[0] == shape[1])
793                ncov = shape[0]
794                covd['layout'] = '%d %d f' % (ncov, ncov)
795                ds = ''
796                for i in range(ncov):
797                    for j in range(ncov):
798                        val = allcov[name][i][j]
799                        if val == 0:
800                            ds += '0 '
801                        else:
802                            ds += '%1.14e ' % (val)
803                    ds += '\n'
804                covd['#data'] = ds
805
806            gradd = {'id': 'grad'}
807            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
808            ds = ''
809            for i in range(ncov):
810                for o in obsl:
811                    if name in o.covobs:
812                        val = o.covobs[name].grad[i]
813                        if val != 0:
814                            ds += '%1.14e ' % (val)
815                        else:
816                            ds += '0 '
817                    else:
818                        ds += '0 '
819            gradd['#data'] = ds
820            cd['array'] = [covd, gradd]
821            pd['cdata'].append(cd)
822
823    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
824
825    return rs

Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator |is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
#   def write_dobs( obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True ):
View Source
828def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True):
829    """Export a list of Obs or structures containing Obs to a .xml.gz file
830    according to the Zeuthen dobs format.
831
832    Tags are not written or recovered automatically. The separator | is removed from the replica names.
833
834    Parameters
835    ----------
836    obsl : list
837        List of Obs that will be exported.
838        The Obs inside a structure do not have to be defined on the same set of configurations,
839        but the storage requirement is increased, if this is not the case.
840    fname : str
841        Filename of the output file.
842    name : str
843        The name of the observable.
844    spec : str
845        Optional string that describes the contents of the file.
846    origin : str
847        Specify where the data has its origin.
848    symbol : list
849        A list of symbols that describe the observables to be written. May be empty.
850    who : str
851        Provide the name of the person that exports the data.
852    enstags : dict
853        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
854        Otherwise, the ensemble name is used.
855    gz : bool
856        If True, the output is a gzipped XML. If False, the output is a XML file.
857    """
858
859    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
860
861    if not fname.endswith('.xml') and not fname.endswith('.gz'):
862        fname += '.xml'
863
864    if gz:
865        if not fname.endswith('.gz'):
866            fname += '.gz'
867
868        fp = gzip.open(fname, 'wb')
869        fp.write(dobsstring.encode('utf-8'))
870    else:
871        fp = open(fname, 'w', encoding='utf-8')
872        fp.write(dobsstring)
873    fp.close()

Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.

Tags are not written or recovered automatically. The separator | is removed from the replica names.

Parameters
  • obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
  • fname (str): Filename of the output file.
  • name (str): The name of the observable.
  • spec (str): Optional string that describes the contents of the file.
  • origin (str): Specify where the data has its origin.
  • symbol (list): A list of symbols that describe the observables to be written. May be empty.
  • who (str): Provide the name of the person that exports the data.
  • enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
  • gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.