diff --git a/docs/pyerrors/input/dobs.html b/docs/pyerrors/input/dobs.html index bc8e50c3..5f72dc97 100644 --- a/docs/pyerrors/input/dobs.html +++ b/docs/pyerrors/input/dobs.html @@ -470,502 +470,503 @@ 379 return res 380 381 -382# Reading (and writing) dobs is not yet working properly: -383# we have to loop over root[2:] because each entry is a dobs -384# But maybe this is just a problem with Ben's implementation +382# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py +383def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): +384 """Import a list of Obs from a string in the Zeuthen dobs format. 385 -386# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py -387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): -388 """Import a list of Obs from a string in the Zeuthen dobs format. -389 -390 Tags are not written or recovered automatically. -391 -392 Parameters -393 ---------- -394 content : str -395 XML string containing the data -396 noemtpy : bool -397 If True, ensembles with no contribution to the Obs are not included. -398 If False, ensembles are included as written in the file, possibly with vanishing entries. -399 full_output : bool -400 If True, a dict containing auxiliary information and the data is returned. -401 If False, only the data is returned as list. -402 separatior_insertion: str, int or bool -403 str: replace all occurences of "separator_insertion" within the replica names -404 by "|%s" % (separator_insertion) when constructing the names of the replica. -405 int: Insert the separator "|" at the position given by separator_insertion. -406 True (default): separator "|" is inserted after len(ensname), assuming that the -407 ensemble name is a prefix to the replica name. -408 None or False: No separator is inserted. -409 """ -410 -411 root = et.fromstring(content) +386 Tags are not written or recovered automatically. +387 +388 Parameters +389 ---------- +390 content : str +391 XML string containing the data +392 noemtpy : bool +393 If True, ensembles with no contribution to the Obs are not included. +394 If False, ensembles are included as written in the file, possibly with vanishing entries. +395 full_output : bool +396 If True, a dict containing auxiliary information and the data is returned. +397 If False, only the data is returned as list. +398 separatior_insertion: str, int or bool +399 str: replace all occurences of "separator_insertion" within the replica names +400 by "|%s" % (separator_insertion) when constructing the names of the replica. +401 int: Insert the separator "|" at the position given by separator_insertion. +402 True (default): separator "|" is inserted after len(ensname), assuming that the +403 ensemble name is a prefix to the replica name. +404 None or False: No separator is inserted. +405 """ +406 +407 root = et.fromstring(content) +408 +409 _check(root.tag == 'OBSERVABLES') +410 _check(root[0].tag == 'SCHEMA') +411 version = root[0][1].text.strip() 412 -413 _check(root.tag == 'OBSERVABLES') -414 _check(root[0].tag == 'SCHEMA') -415 version = root[0][1].text.strip() -416 -417 _check(root[1].tag == 'origin') -418 file_origin = _etree_to_dict(root[1])['origin'] +413 _check(root[1].tag == 'origin') +414 file_origin = _etree_to_dict(root[1])['origin'] +415 +416 _check(root[2].tag == 'dobs') +417 +418 dobs = root[2] 419 -420 _check(root[2].tag == 'dobs') -421 -422 dobs = root[2] +420 descriptiond = {} +421 for i in range(3): +422 descriptiond[dobs[i].tag] = dobs[i].text.strip() 423 -424 descriptiond = {} -425 for i in range(3): -426 descriptiond[dobs[i].tag] = dobs[i].text.strip() -427 -428 _check(dobs[3].tag == 'array') -429 -430 symbol = [] -431 if dobs[3][1].tag == 'symbol': -432 symbol = dobs[3][1].text.strip() -433 descriptiond['symbol'] = symbol -434 mean = _import_array(dobs[3])[0] -435 -436 _check(dobs[4].tag == "ne") -437 ne = int(dobs[4].text.strip()) -438 _check(dobs[5].tag == "nc") -439 nc = int(dobs[5].text.strip()) -440 -441 idld = {} -442 deltad = {} -443 covd = {} -444 gradd = {} -445 names = [] -446 e_names = [] -447 enstags = {} -448 for k in range(6, len(list(dobs))): -449 if dobs[k].tag == "edata": -450 _check(dobs[k][0].tag == "enstag") -451 ename = dobs[k][0].text.strip() -452 e_names.append(ename) -453 _check(dobs[k][1].tag == "nr") -454 R = int(dobs[k][1].text.strip()) -455 for i in range(2, 2 + R): -456 deltas, rname, idx = _import_rdata(dobs[k][i]) -457 if separator_insertion is None or False: -458 pass -459 elif separator_insertion is True: -460 if rname.startswith(ename): -461 rname = rname[:len(ename)] + '|' + rname[len(ename):] -462 elif isinstance(separator_insertion, int): -463 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] -464 elif isinstance(separator_insertion, str): -465 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) +424 _check(dobs[3].tag == 'array') +425 +426 symbol = [] +427 if dobs[3][1].tag == 'symbol': +428 symbol = dobs[3][1].text.strip() +429 descriptiond['symbol'] = symbol +430 mean = _import_array(dobs[3])[0] +431 +432 _check(dobs[4].tag == "ne") +433 ne = int(dobs[4].text.strip()) +434 _check(dobs[5].tag == "nc") +435 nc = int(dobs[5].text.strip()) +436 +437 idld = {} +438 deltad = {} +439 covd = {} +440 gradd = {} +441 names = [] +442 e_names = [] +443 enstags = {} +444 for k in range(6, len(list(dobs))): +445 if dobs[k].tag == "edata": +446 _check(dobs[k][0].tag == "enstag") +447 ename = dobs[k][0].text.strip() +448 e_names.append(ename) +449 _check(dobs[k][1].tag == "nr") +450 R = int(dobs[k][1].text.strip()) +451 for i in range(2, 2 + R): +452 deltas, rname, idx = _import_rdata(dobs[k][i]) +453 if separator_insertion is None or False: +454 pass +455 elif separator_insertion is True: +456 if rname.startswith(ename): +457 rname = rname[:len(ename)] + '|' + rname[len(ename):] +458 elif isinstance(separator_insertion, int): +459 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] +460 elif isinstance(separator_insertion, str): +461 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) +462 else: +463 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) +464 if '|' in rname: +465 new_ename = rname[:rname.index('|')] 466 else: -467 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) -468 if '|' in rname: -469 new_ename = rname[:rname.index('|')] -470 else: -471 new_ename = ename -472 enstags[new_ename] = ename -473 idld[rname] = idx -474 deltad[rname] = deltas -475 names.append(rname) -476 elif dobs[k].tag == "cdata": -477 cname, cov, grad = _import_cdata(dobs[k]) -478 covd[cname] = cov -479 if grad.shape[1] == 1: -480 gradd[cname] = [grad for i in range(len(mean))] -481 else: -482 gradd[cname] = grad.T -483 else: -484 _check(False) -485 names = list(set(names)) +467 new_ename = ename +468 enstags[new_ename] = ename +469 idld[rname] = idx +470 deltad[rname] = deltas +471 names.append(rname) +472 elif dobs[k].tag == "cdata": +473 cname, cov, grad = _import_cdata(dobs[k]) +474 covd[cname] = cov +475 if grad.shape[1] == 1: +476 gradd[cname] = [grad for i in range(len(mean))] +477 else: +478 gradd[cname] = grad.T +479 else: +480 _check(False) +481 names = list(set(names)) +482 +483 for name in names: +484 for i in range(len(deltad[name])): +485 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 486 -487 for name in names: -488 for i in range(len(deltad[name])): -489 deltad[name][i] = np.array(deltad[name][i]) + mean[i] -490 -491 res = [] -492 for i in range(len(mean)): -493 deltas = [] -494 idl = [] -495 obs_names = [] -496 for name in names: -497 h = np.unique(deltad[name][i]) -498 if len(h) == 1 and np.all(h == mean[i]) and noempty: -499 continue -500 deltas.append(deltad[name][i]) -501 obs_names.append(name) -502 idl.append(idld[name]) -503 res.append(Obs(deltas, obs_names, idl=idl)) -504 print(mean, 'vs', res) -505 _check(len(e_names) == ne) -506 -507 cnames = list(covd.keys()) -508 for i in range(len(res)): -509 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} -510 if noempty: -511 for name in cnames: -512 if np.all(new_covobs[name].grad == 0): -513 del new_covobs[name] -514 cnames_loc = list(new_covobs.keys()) -515 else: -516 cnames_loc = cnames -517 for name in cnames_loc: -518 res[i].names.append(name) -519 res[i].shape[name] = 1 -520 res[i].idl[name] = [] -521 res[i]._covobs = new_covobs -522 -523 if symbol: -524 for i in range(len(res)): -525 res[i].tag = symbol[i] -526 if res[i].tag == 'None': -527 res[i].tag = None -528 if not noempty: -529 _check(len(res[0].covobs.keys()) == nc) -530 if full_output: -531 retd = {} -532 tool = file_origin.get('tool', None) -533 if tool: -534 program = tool['name'] + ' ' + tool['version'] -535 else: -536 program = '' -537 retd['program'] = program -538 retd['version'] = version -539 retd['who'] = file_origin['who'] -540 retd['date'] = file_origin['date'] -541 retd['host'] = file_origin['host'] -542 retd['description'] = descriptiond -543 retd['enstags'] = enstags -544 retd['obsdata'] = res -545 return retd -546 else: -547 return res +487 res = [] +488 for i in range(len(mean)): +489 deltas = [] +490 idl = [] +491 obs_names = [] +492 for name in names: +493 h = np.unique(deltad[name][i]) +494 if len(h) == 1 and np.all(h == mean[i]) and noempty: +495 continue +496 deltas.append(deltad[name][i]) +497 obs_names.append(name) +498 idl.append(idld[name]) +499 res.append(Obs(deltas, obs_names, idl=idl)) +500 res[-1]._value = mean[i] +501 _check(len(e_names) == ne) +502 +503 cnames = list(covd.keys()) +504 for i in range(len(res)): +505 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} +506 if noempty: +507 for name in cnames: +508 if np.all(new_covobs[name].grad == 0): +509 del new_covobs[name] +510 cnames_loc = list(new_covobs.keys()) +511 else: +512 cnames_loc = cnames +513 for name in cnames_loc: +514 res[i].names.append(name) +515 res[i].shape[name] = 1 +516 res[i].idl[name] = [] +517 res[i]._covobs = new_covobs +518 +519 if symbol: +520 for i in range(len(res)): +521 res[i].tag = symbol[i] +522 if res[i].tag == 'None': +523 res[i].tag = None +524 if not noempty: +525 _check(len(res[0].covobs.keys()) == nc) +526 if full_output: +527 retd = {} +528 tool = file_origin.get('tool', None) +529 if tool: +530 program = tool['name'] + ' ' + tool['version'] +531 else: +532 program = '' +533 retd['program'] = program +534 retd['version'] = version +535 retd['who'] = file_origin['who'] +536 retd['date'] = file_origin['date'] +537 retd['host'] = file_origin['host'] +538 retd['description'] = descriptiond +539 retd['enstags'] = enstags +540 retd['obsdata'] = res +541 return retd +542 else: +543 return res +544 +545 +546def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): +547 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 548 -549 -550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): -551 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. -552 -553 Tags are not written or recovered automatically. -554 -555 Parameters -556 ---------- -557 fname : str -558 Filename of the input file. -559 noemtpy : bool -560 If True, ensembles with no contribution to the Obs are not included. -561 If False, ensembles are included as written in the file. -562 full_output : bool -563 If True, a dict containing auxiliary information and the data is returned. -564 If False, only the data is returned as list. -565 gz : bool -566 If True, assumes that data is gzipped. If False, assumes XML file. -567 separatior_insertion: str, int or bool -568 str: replace all occurences of "separator_insertion" within the replica names -569 by "|%s" % (separator_insertion) when constructing the names of the replica. -570 int: Insert the separator "|" at the position given by separator_insertion. -571 True (default): separator "|" is inserted after len(ensname), assuming that the -572 ensemble name is a prefix to the replica name. -573 None or False: No separator is inserted. -574 """ -575 -576 if not fname.endswith('.xml') and not fname.endswith('.gz'): -577 fname += '.xml' -578 if gz: -579 if not fname.endswith('.gz'): -580 fname += '.gz' -581 with gzip.open(fname, 'r') as fin: -582 content = fin.read().decode('utf-8') -583 else: -584 if fname.endswith('.gz'): -585 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) -586 with open(fname, 'r', encoding='utf-8') as fin: -587 content = fin.read() -588 -589 # open and read gzipped xml file -590 infile = gzip.open(fname) -591 content = infile.read() -592 -593 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) -594 -595 -596def _dobsdict_to_xmlstring(d): -597 if isinstance(d, dict): -598 iters = '' -599 for k in d: -600 if k.startswith('#value'): -601 for li in d[k]: -602 iters += li -603 return iters + '\n' -604 elif k.startswith('#'): -605 for li in d[k]: -606 iters += li -607 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') -608 return iters -609 if isinstance(d[k], dict): -610 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) -611 elif isinstance(d[k], str): -612 if len(d[k]) > 100: -613 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) -614 else: -615 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) -616 elif isinstance(d[k], list): -617 tmps = '' -618 if k in ['edata', 'cdata']: -619 for i in range(len(d[k])): -620 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) -621 else: -622 for i in range(len(d[k])): -623 tmps += _dobsdict_to_xmlstring(d[k][i]) -624 iters += tmps -625 elif isinstance(d[k], (int, float)): -626 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) -627 elif not d[k]: -628 return '\n' -629 else: -630 raise Exception('Type', type(d[k]), 'not supported in export!') -631 else: -632 raise Exception('Type', type(d), 'not supported in export!') -633 return iters -634 -635 -636def _dobsdict_to_xmlstring_spaces(d, space=' '): -637 s = _dobsdict_to_xmlstring(d) -638 o = '' -639 c = 0 -640 cm = False -641 for li in s.split('\n'): -642 if li.startswith('<%s' % ('/')): -643 c -= 1 -644 cm = True -645 for i in range(c): -646 o += space -647 o += li + '\n' -648 if li.startswith('<') and not cm: -649 if not '<%s' % ('/') in li: -650 c += 1 -651 cm = False -652 return o -653 -654 -655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): -656 """Generate the string for the export of a list of Obs or structures containing Obs -657 to a .xml.gz file according to the Zeuthen dobs format. -658 -659 Tags are not written or recovered automatically. The separator |is removed from the replica names. -660 -661 Parameters -662 ---------- -663 obsl : list -664 List of Obs that will be exported. -665 The Obs inside a structure do not have to be defined on the same set of configurations, -666 but the storage requirement is increased, if this is not the case. -667 name : str -668 The name of the observable. -669 spec : str -670 Optional string that describes the contents of the file. -671 origin : str -672 Specify where the data has its origin. -673 symbol : list -674 A list of symbols that describe the observables to be written. May be empty. -675 who : str -676 Provide the name of the person that exports the data. -677 enstags : dict -678 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} -679 Otherwise, the ensemble name is used. -680 """ -681 if enstags is None: -682 enstags = {} -683 od = {} -684 r_names = [] -685 for o in obsl: -686 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] -687 r_names = sorted(set(r_names)) -688 mc_names = sorted(set([n.split('|')[0] for n in r_names])) -689 for tmpname in mc_names: -690 if tmpname not in enstags: -691 enstags[tmpname] = tmpname -692 ne = len(set(mc_names)) -693 cov_names = [] -694 for o in obsl: -695 cov_names += list(o.cov_names) -696 cov_names = sorted(set(cov_names)) -697 nc = len(set(cov_names)) -698 od['OBSERVABLES'] = {} -699 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} -700 if who is None: -701 who = getpass.getuser() -702 od['OBSERVABLES']['origin'] = { -703 'who': who, -704 'date': str(datetime.datetime.now())[:-7], -705 'host': socket.gethostname(), -706 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} -707 od['OBSERVABLES']['dobs'] = {} -708 pd = od['OBSERVABLES']['dobs'] -709 pd['spec'] = spec -710 pd['origin'] = origin -711 pd['name'] = name -712 pd['array'] = {} -713 pd['array']['id'] = 'val' -714 pd['array']['layout'] = '1 f%d' % (len(obsl)) -715 osymbol = '' -716 if symbol: -717 if not isinstance(symbol, list): -718 raise Exception('Symbol has to be a list!') -719 if not (len(symbol) == 0 or len(symbol) == len(obsl)): -720 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) -721 osymbol = symbol[0] -722 for s in symbol[1:]: -723 osymbol += ' %s' % s -724 pd['array']['symbol'] = osymbol -725 -726 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] -727 pd['ne'] = '%d' % (ne) -728 pd['nc'] = '%d' % (nc) -729 pd['edata'] = [] -730 for name in mc_names: -731 ed = {} -732 ed['enstag'] = enstags[name] -733 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) -734 nr = len(onames) -735 ed['nr'] = nr -736 ed[''] = [] -737 -738 for r in range(nr): -739 ad = {} -740 repname = onames[r] -741 ad['id'] = repname.replace('|', '') -742 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) -743 Nconf = len(idx) -744 layout = '%d i f%d' % (Nconf, len(obsl)) -745 ad['layout'] = layout -746 data = '' -747 counters = [0 for o in obsl] -748 for ci in idx: -749 data += '%d ' % ci -750 for oi in range(len(obsl)): -751 o = obsl[oi] -752 if repname in o.idl: -753 if counters[oi] < 0: -754 data += '0 ' -755 continue -756 if o.idl[repname][counters[oi]] == ci: -757 num = o.deltas[repname][counters[oi]] -758 if num == 0: -759 data += '0 ' -760 else: -761 data += '%1.16e ' % (num) -762 counters[oi] += 1 -763 if counters[oi] >= len(o.idl[repname]): -764 counters[oi] = -1 -765 else: -766 data += '0 ' -767 else: -768 data += '0 ' -769 data += '\n' -770 ad['#data'] = data -771 ed[''].append(ad) -772 pd['edata'].append(ed) -773 -774 allcov = {} -775 for o in obsl: -776 for name in o.cov_names: -777 if name in allcov: -778 if not np.array_equal(allcov[name], o.covobs[name].cov): -779 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -780 else: -781 allcov[name] = o.covobs[name].cov -782 pd['cdata'] = [] -783 for name in cov_names: -784 cd = {} -785 cd['id'] = name -786 -787 covd = {'id': 'cov'} -788 if allcov[name].shape == (): -789 ncov = 1 -790 covd['layout'] = '1 1 f' -791 covd['#data'] = '%1.14e' % (allcov[name]) -792 else: -793 shape = allcov[name].shape -794 assert (shape[0] == shape[1]) -795 ncov = shape[0] -796 covd['layout'] = '%d %d f' % (ncov, ncov) -797 ds = '' -798 for i in range(ncov): -799 for j in range(ncov): -800 val = allcov[name][i][j] -801 if val == 0: -802 ds += '0 ' -803 else: -804 ds += '%1.14e ' % (val) -805 ds += '\n' -806 covd['#data'] = ds -807 -808 gradd = {'id': 'grad'} -809 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) -810 ds = '' -811 for i in range(ncov): -812 for o in obsl: -813 if name in o.covobs: -814 val = o.covobs[name].grad[i] -815 if val != 0: -816 ds += '%1.14e ' % (val) -817 else: -818 ds += '0 ' -819 else: -820 ds += '0 ' -821 gradd['#data'] = ds -822 cd['array'] = [covd, gradd] -823 pd['cdata'].append(cd) -824 -825 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) -826 -827 return rs -828 +549 Tags are not written or recovered automatically. +550 +551 Parameters +552 ---------- +553 fname : str +554 Filename of the input file. +555 noemtpy : bool +556 If True, ensembles with no contribution to the Obs are not included. +557 If False, ensembles are included as written in the file. +558 full_output : bool +559 If True, a dict containing auxiliary information and the data is returned. +560 If False, only the data is returned as list. +561 gz : bool +562 If True, assumes that data is gzipped. If False, assumes XML file. +563 separatior_insertion: str, int or bool +564 str: replace all occurences of "separator_insertion" within the replica names +565 by "|%s" % (separator_insertion) when constructing the names of the replica. +566 int: Insert the separator "|" at the position given by separator_insertion. +567 True (default): separator "|" is inserted after len(ensname), assuming that the +568 ensemble name is a prefix to the replica name. +569 None or False: No separator is inserted. +570 """ +571 +572 if not fname.endswith('.xml') and not fname.endswith('.gz'): +573 fname += '.xml' +574 if gz: +575 if not fname.endswith('.gz'): +576 fname += '.gz' +577 with gzip.open(fname, 'r') as fin: +578 content = fin.read() +579 else: +580 if fname.endswith('.gz'): +581 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) +582 with open(fname, 'r') as fin: +583 content = fin.read() +584 +585 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) +586 +587 +588def _dobsdict_to_xmlstring(d): +589 if isinstance(d, dict): +590 iters = '' +591 for k in d: +592 if k.startswith('#value'): +593 for li in d[k]: +594 iters += li +595 return iters + '\n' +596 elif k.startswith('#'): +597 for li in d[k]: +598 iters += li +599 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') +600 return iters +601 if isinstance(d[k], dict): +602 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) +603 elif isinstance(d[k], str): +604 if len(d[k]) > 100: +605 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) +606 else: +607 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) +608 elif isinstance(d[k], list): +609 tmps = '' +610 if k in ['edata', 'cdata']: +611 for i in range(len(d[k])): +612 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) +613 else: +614 for i in range(len(d[k])): +615 tmps += _dobsdict_to_xmlstring(d[k][i]) +616 iters += tmps +617 elif isinstance(d[k], (int, float)): +618 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) +619 elif not d[k]: +620 return '\n' +621 else: +622 raise Exception('Type', type(d[k]), 'not supported in export!') +623 else: +624 raise Exception('Type', type(d), 'not supported in export!') +625 return iters +626 +627 +628def _dobsdict_to_xmlstring_spaces(d, space=' '): +629 s = _dobsdict_to_xmlstring(d) +630 o = '' +631 c = 0 +632 cm = False +633 for li in s.split('\n'): +634 if li.startswith('<%s' % ('/')): +635 c -= 1 +636 cm = True +637 for i in range(c): +638 o += space +639 o += li + '\n' +640 if li.startswith('<') and not cm: +641 if not '<%s' % ('/') in li: +642 c += 1 +643 cm = False +644 return o +645 +646 +647def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): +648 """Generate the string for the export of a list of Obs or structures containing Obs +649 to a .xml.gz file according to the Zeuthen dobs format. +650 +651 Tags are not written or recovered automatically. The separator |is removed from the replica names. +652 +653 Parameters +654 ---------- +655 obsl : list +656 List of Obs that will be exported. +657 The Obs inside a structure do not have to be defined on the same set of configurations, +658 but the storage requirement is increased, if this is not the case. +659 name : str +660 The name of the observable. +661 spec : str +662 Optional string that describes the contents of the file. +663 origin : str +664 Specify where the data has its origin. +665 symbol : list +666 A list of symbols that describe the observables to be written. May be empty. +667 who : str +668 Provide the name of the person that exports the data. +669 enstags : dict +670 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} +671 Otherwise, the ensemble name is used. +672 """ +673 if enstags is None: +674 enstags = {} +675 od = {} +676 r_names = [] +677 for o in obsl: +678 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] +679 r_names = sorted(set(r_names)) +680 mc_names = sorted(set([n.split('|')[0] for n in r_names])) +681 for tmpname in mc_names: +682 if tmpname not in enstags: +683 enstags[tmpname] = tmpname +684 ne = len(set(mc_names)) +685 cov_names = [] +686 for o in obsl: +687 cov_names += list(o.cov_names) +688 cov_names = sorted(set(cov_names)) +689 nc = len(set(cov_names)) +690 od['OBSERVABLES'] = {} +691 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} +692 if who is None: +693 who = getpass.getuser() +694 od['OBSERVABLES']['origin'] = { +695 'who': who, +696 'date': str(datetime.datetime.now())[:-7], +697 'host': socket.gethostname(), +698 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} +699 od['OBSERVABLES']['dobs'] = {} +700 pd = od['OBSERVABLES']['dobs'] +701 pd['spec'] = spec +702 pd['origin'] = origin +703 pd['name'] = name +704 pd['array'] = {} +705 pd['array']['id'] = 'val' +706 pd['array']['layout'] = '1 f%d' % (len(obsl)) +707 osymbol = '' +708 if symbol: +709 if not isinstance(symbol, list): +710 raise Exception('Symbol has to be a list!') +711 if not (len(symbol) == 0 or len(symbol) == len(obsl)): +712 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) +713 osymbol = symbol[0] +714 for s in symbol[1:]: +715 osymbol += ' %s' % s +716 pd['array']['symbol'] = osymbol +717 +718 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] +719 pd['ne'] = '%d' % (ne) +720 pd['nc'] = '%d' % (nc) +721 pd['edata'] = [] +722 for name in mc_names: +723 ed = {} +724 ed['enstag'] = enstags[name] +725 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) +726 nr = len(onames) +727 ed['nr'] = nr +728 ed[''] = [] +729 +730 for r in range(nr): +731 ad = {} +732 repname = onames[r] +733 ad['id'] = repname.replace('|', '') +734 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) +735 Nconf = len(idx) +736 layout = '%d i f%d' % (Nconf, len(obsl)) +737 ad['layout'] = layout +738 data = '' +739 counters = [0 for o in obsl] +740 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] +741 for ci in idx: +742 data += '%d ' % ci +743 for oi in range(len(obsl)): +744 o = obsl[oi] +745 if repname in o.idl: +746 if counters[oi] < 0: +747 num = offsets[oi] +748 if num == 0: +749 data += '0 ' +750 else: +751 data += '%1.16e ' % (num) +752 continue +753 if o.idl[repname][counters[oi]] == ci: +754 num = o.deltas[repname][counters[oi]] + offsets[oi] +755 if num == 0: +756 data += '0 ' +757 else: +758 data += '%1.16e ' % (num) +759 counters[oi] += 1 +760 if counters[oi] >= len(o.idl[repname]): +761 counters[oi] = -1 +762 else: +763 num = offsets[oi] +764 if num == 0: +765 data += '0 ' +766 else: +767 data += '%1.16e ' % (num) +768 else: +769 data += '0 ' +770 data += '\n' +771 ad['#data'] = data +772 ed[''].append(ad) +773 pd['edata'].append(ed) +774 +775 allcov = {} +776 for o in obsl: +777 for cname in o.cov_names: +778 if cname in allcov: +779 if not np.array_equal(allcov[cname], o.covobs[cname].cov): +780 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) +781 else: +782 allcov[cname] = o.covobs[cname].cov +783 pd['cdata'] = [] +784 for cname in cov_names: +785 cd = {} +786 cd['id'] = cname +787 +788 covd = {'id': 'cov'} +789 if allcov[cname].shape == (): +790 ncov = 1 +791 covd['layout'] = '1 1 f' +792 covd['#data'] = '%1.14e' % (allcov[cname]) +793 else: +794 shape = allcov[cname].shape +795 assert (shape[0] == shape[1]) +796 ncov = shape[0] +797 covd['layout'] = '%d %d f' % (ncov, ncov) +798 ds = '' +799 for i in range(ncov): +800 for j in range(ncov): +801 val = allcov[cname][i][j] +802 if val == 0: +803 ds += '0 ' +804 else: +805 ds += '%1.14e ' % (val) +806 ds += '\n' +807 covd['#data'] = ds +808 +809 gradd = {'id': 'grad'} +810 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) +811 ds = '' +812 for i in range(ncov): +813 for o in obsl: +814 if cname in o.covobs: +815 val = o.covobs[cname].grad[i] +816 if val != 0: +817 ds += '%1.14e ' % (val) +818 else: +819 ds += '0 ' +820 else: +821 ds += '0 ' +822 gradd['#data'] = ds +823 cd['array'] = [covd, gradd] +824 pd['cdata'].append(cd) +825 +826 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) +827 +828 return rs 829 -830def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): -831 """Export a list of Obs or structures containing Obs to a .xml.gz file -832 according to the Zeuthen dobs format. -833 -834 Tags are not written or recovered automatically. The separator | is removed from the replica names. -835 -836 Parameters -837 ---------- -838 obsl : list -839 List of Obs that will be exported. -840 The Obs inside a structure do not have to be defined on the same set of configurations, -841 but the storage requirement is increased, if this is not the case. -842 fname : str -843 Filename of the output file. -844 name : str -845 The name of the observable. -846 spec : str -847 Optional string that describes the contents of the file. -848 origin : str -849 Specify where the data has its origin. -850 symbol : list -851 A list of symbols that describe the observables to be written. May be empty. -852 who : str -853 Provide the name of the person that exports the data. -854 enstags : dict -855 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} -856 Otherwise, the ensemble name is used. -857 gz : bool -858 If True, the output is a gzipped XML. If False, the output is a XML file. -859 """ -860 if enstags is None: -861 enstags = {} -862 -863 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) -864 -865 if not fname.endswith('.xml') and not fname.endswith('.gz'): -866 fname += '.xml' -867 -868 if gz: -869 if not fname.endswith('.gz'): -870 fname += '.gz' -871 -872 fp = gzip.open(fname, 'wb') -873 fp.write(dobsstring.encode('utf-8')) -874 else: -875 fp = open(fname, 'w', encoding='utf-8') -876 fp.write(dobsstring) -877 fp.close() +830 +831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): +832 """Export a list of Obs or structures containing Obs to a .xml.gz file +833 according to the Zeuthen dobs format. +834 +835 Tags are not written or recovered automatically. The separator | is removed from the replica names. +836 +837 Parameters +838 ---------- +839 obsl : list +840 List of Obs that will be exported. +841 The Obs inside a structure do not have to be defined on the same set of configurations, +842 but the storage requirement is increased, if this is not the case. +843 fname : str +844 Filename of the output file. +845 name : str +846 The name of the observable. +847 spec : str +848 Optional string that describes the contents of the file. +849 origin : str +850 Specify where the data has its origin. +851 symbol : list +852 A list of symbols that describe the observables to be written. May be empty. +853 who : str +854 Provide the name of the person that exports the data. +855 enstags : dict +856 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} +857 Otherwise, the ensemble name is used. +858 gz : bool +859 If True, the output is a gzipped XML. If False, the output is a XML file. +860 """ +861 if enstags is None: +862 enstags = {} +863 +864 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) +865 +866 if not fname.endswith('.xml') and not fname.endswith('.gz'): +867 fname += '.xml' +868 +869 if gz: +870 if not fname.endswith('.gz'): +871 fname += '.gz' +872 +873 fp = gzip.open(fname, 'wb') +874 fp.write(dobsstring.encode('utf-8')) +875 else: +876 fp = open(fname, 'w', encoding='utf-8') +877 fp.write(dobsstring) +878 fp.close() @@ -1315,167 +1316,167 @@ None (default): Replica names remain unchanged. -
388def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
-389    """Import a list of Obs from a string in the Zeuthen dobs format.
-390
-391    Tags are not written or recovered automatically.
-392
-393    Parameters
-394    ----------
-395    content : str
-396        XML string containing the data
-397    noemtpy : bool
-398        If True, ensembles with no contribution to the Obs are not included.
-399        If False, ensembles are included as written in the file, possibly with vanishing entries.
-400    full_output : bool
-401        If True, a dict containing auxiliary information and the data is returned.
-402        If False, only the data is returned as list.
-403    separatior_insertion: str, int or bool
-404        str: replace all occurences of "separator_insertion" within the replica names
-405        by "|%s" % (separator_insertion) when constructing the names of the replica.
-406        int: Insert the separator "|" at the position given by separator_insertion.
-407        True (default): separator "|" is inserted after len(ensname), assuming that the
-408        ensemble name is a prefix to the replica name.
-409        None or False: No separator is inserted.
-410    """
-411
-412    root = et.fromstring(content)
+            
384def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
+385    """Import a list of Obs from a string in the Zeuthen dobs format.
+386
+387    Tags are not written or recovered automatically.
+388
+389    Parameters
+390    ----------
+391    content : str
+392        XML string containing the data
+393    noemtpy : bool
+394        If True, ensembles with no contribution to the Obs are not included.
+395        If False, ensembles are included as written in the file, possibly with vanishing entries.
+396    full_output : bool
+397        If True, a dict containing auxiliary information and the data is returned.
+398        If False, only the data is returned as list.
+399    separatior_insertion: str, int or bool
+400        str: replace all occurences of "separator_insertion" within the replica names
+401        by "|%s" % (separator_insertion) when constructing the names of the replica.
+402        int: Insert the separator "|" at the position given by separator_insertion.
+403        True (default): separator "|" is inserted after len(ensname), assuming that the
+404        ensemble name is a prefix to the replica name.
+405        None or False: No separator is inserted.
+406    """
+407
+408    root = et.fromstring(content)
+409
+410    _check(root.tag == 'OBSERVABLES')
+411    _check(root[0].tag == 'SCHEMA')
+412    version = root[0][1].text.strip()
 413
-414    _check(root.tag == 'OBSERVABLES')
-415    _check(root[0].tag == 'SCHEMA')
-416    version = root[0][1].text.strip()
-417
-418    _check(root[1].tag == 'origin')
-419    file_origin = _etree_to_dict(root[1])['origin']
+414    _check(root[1].tag == 'origin')
+415    file_origin = _etree_to_dict(root[1])['origin']
+416
+417    _check(root[2].tag == 'dobs')
+418
+419    dobs = root[2]
 420
-421    _check(root[2].tag == 'dobs')
-422
-423    dobs = root[2]
+421    descriptiond = {}
+422    for i in range(3):
+423        descriptiond[dobs[i].tag] = dobs[i].text.strip()
 424
-425    descriptiond = {}
-426    for i in range(3):
-427        descriptiond[dobs[i].tag] = dobs[i].text.strip()
-428
-429    _check(dobs[3].tag == 'array')
-430
-431    symbol = []
-432    if dobs[3][1].tag == 'symbol':
-433        symbol = dobs[3][1].text.strip()
-434        descriptiond['symbol'] = symbol
-435    mean = _import_array(dobs[3])[0]
-436
-437    _check(dobs[4].tag == "ne")
-438    ne = int(dobs[4].text.strip())
-439    _check(dobs[5].tag == "nc")
-440    nc = int(dobs[5].text.strip())
-441
-442    idld = {}
-443    deltad = {}
-444    covd = {}
-445    gradd = {}
-446    names = []
-447    e_names = []
-448    enstags = {}
-449    for k in range(6, len(list(dobs))):
-450        if dobs[k].tag == "edata":
-451            _check(dobs[k][0].tag == "enstag")
-452            ename = dobs[k][0].text.strip()
-453            e_names.append(ename)
-454            _check(dobs[k][1].tag == "nr")
-455            R = int(dobs[k][1].text.strip())
-456            for i in range(2, 2 + R):
-457                deltas, rname, idx = _import_rdata(dobs[k][i])
-458                if separator_insertion is None or False:
-459                    pass
-460                elif separator_insertion is True:
-461                    if rname.startswith(ename):
-462                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
-463                elif isinstance(separator_insertion, int):
-464                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
-465                elif isinstance(separator_insertion, str):
-466                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
+425    _check(dobs[3].tag == 'array')
+426
+427    symbol = []
+428    if dobs[3][1].tag == 'symbol':
+429        symbol = dobs[3][1].text.strip()
+430        descriptiond['symbol'] = symbol
+431    mean = _import_array(dobs[3])[0]
+432
+433    _check(dobs[4].tag == "ne")
+434    ne = int(dobs[4].text.strip())
+435    _check(dobs[5].tag == "nc")
+436    nc = int(dobs[5].text.strip())
+437
+438    idld = {}
+439    deltad = {}
+440    covd = {}
+441    gradd = {}
+442    names = []
+443    e_names = []
+444    enstags = {}
+445    for k in range(6, len(list(dobs))):
+446        if dobs[k].tag == "edata":
+447            _check(dobs[k][0].tag == "enstag")
+448            ename = dobs[k][0].text.strip()
+449            e_names.append(ename)
+450            _check(dobs[k][1].tag == "nr")
+451            R = int(dobs[k][1].text.strip())
+452            for i in range(2, 2 + R):
+453                deltas, rname, idx = _import_rdata(dobs[k][i])
+454                if separator_insertion is None or False:
+455                    pass
+456                elif separator_insertion is True:
+457                    if rname.startswith(ename):
+458                        rname = rname[:len(ename)] + '|' + rname[len(ename):]
+459                elif isinstance(separator_insertion, int):
+460                    rname = rname[:separator_insertion] + '|' + rname[separator_insertion:]
+461                elif isinstance(separator_insertion, str):
+462                    rname = rname.replace(separator_insertion, "|%s" % (separator_insertion))
+463                else:
+464                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
+465                if '|' in rname:
+466                    new_ename = rname[:rname.index('|')]
 467                else:
-468                    raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion))
-469                if '|' in rname:
-470                    new_ename = rname[:rname.index('|')]
-471                else:
-472                    new_ename = ename
-473                enstags[new_ename] = ename
-474                idld[rname] = idx
-475                deltad[rname] = deltas
-476                names.append(rname)
-477        elif dobs[k].tag == "cdata":
-478            cname, cov, grad = _import_cdata(dobs[k])
-479            covd[cname] = cov
-480            if grad.shape[1] == 1:
-481                gradd[cname] = [grad for i in range(len(mean))]
-482            else:
-483                gradd[cname] = grad.T
-484        else:
-485            _check(False)
-486    names = list(set(names))
+468                    new_ename = ename
+469                enstags[new_ename] = ename
+470                idld[rname] = idx
+471                deltad[rname] = deltas
+472                names.append(rname)
+473        elif dobs[k].tag == "cdata":
+474            cname, cov, grad = _import_cdata(dobs[k])
+475            covd[cname] = cov
+476            if grad.shape[1] == 1:
+477                gradd[cname] = [grad for i in range(len(mean))]
+478            else:
+479                gradd[cname] = grad.T
+480        else:
+481            _check(False)
+482    names = list(set(names))
+483
+484    for name in names:
+485        for i in range(len(deltad[name])):
+486            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
 487
-488    for name in names:
-489        for i in range(len(deltad[name])):
-490            deltad[name][i] = np.array(deltad[name][i]) + mean[i]
-491
-492    res = []
-493    for i in range(len(mean)):
-494        deltas = []
-495        idl = []
-496        obs_names = []
-497        for name in names:
-498            h = np.unique(deltad[name][i])
-499            if len(h) == 1 and np.all(h == mean[i]) and noempty:
-500                continue
-501            deltas.append(deltad[name][i])
-502            obs_names.append(name)
-503            idl.append(idld[name])
-504        res.append(Obs(deltas, obs_names, idl=idl))
-505    print(mean, 'vs', res)
-506    _check(len(e_names) == ne)
-507
-508    cnames = list(covd.keys())
-509    for i in range(len(res)):
-510        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
-511        if noempty:
-512            for name in cnames:
-513                if np.all(new_covobs[name].grad == 0):
-514                    del new_covobs[name]
-515            cnames_loc = list(new_covobs.keys())
-516        else:
-517            cnames_loc = cnames
-518        for name in cnames_loc:
-519            res[i].names.append(name)
-520            res[i].shape[name] = 1
-521            res[i].idl[name] = []
-522        res[i]._covobs = new_covobs
-523
-524    if symbol:
-525        for i in range(len(res)):
-526            res[i].tag = symbol[i]
-527            if res[i].tag == 'None':
-528                res[i].tag = None
-529    if not noempty:
-530        _check(len(res[0].covobs.keys()) == nc)
-531    if full_output:
-532        retd = {}
-533        tool = file_origin.get('tool', None)
-534        if tool:
-535            program = tool['name'] + ' ' + tool['version']
-536        else:
-537            program = ''
-538        retd['program'] = program
-539        retd['version'] = version
-540        retd['who'] = file_origin['who']
-541        retd['date'] = file_origin['date']
-542        retd['host'] = file_origin['host']
-543        retd['description'] = descriptiond
-544        retd['enstags'] = enstags
-545        retd['obsdata'] = res
-546        return retd
-547    else:
-548        return res
+488    res = []
+489    for i in range(len(mean)):
+490        deltas = []
+491        idl = []
+492        obs_names = []
+493        for name in names:
+494            h = np.unique(deltad[name][i])
+495            if len(h) == 1 and np.all(h == mean[i]) and noempty:
+496                continue
+497            deltas.append(deltad[name][i])
+498            obs_names.append(name)
+499            idl.append(idld[name])
+500        res.append(Obs(deltas, obs_names, idl=idl))
+501        res[-1]._value = mean[i]
+502    _check(len(e_names) == ne)
+503
+504    cnames = list(covd.keys())
+505    for i in range(len(res)):
+506        new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
+507        if noempty:
+508            for name in cnames:
+509                if np.all(new_covobs[name].grad == 0):
+510                    del new_covobs[name]
+511            cnames_loc = list(new_covobs.keys())
+512        else:
+513            cnames_loc = cnames
+514        for name in cnames_loc:
+515            res[i].names.append(name)
+516            res[i].shape[name] = 1
+517            res[i].idl[name] = []
+518        res[i]._covobs = new_covobs
+519
+520    if symbol:
+521        for i in range(len(res)):
+522            res[i].tag = symbol[i]
+523            if res[i].tag == 'None':
+524                res[i].tag = None
+525    if not noempty:
+526        _check(len(res[0].covobs.keys()) == nc)
+527    if full_output:
+528        retd = {}
+529        tool = file_origin.get('tool', None)
+530        if tool:
+531            program = tool['name'] + ' ' + tool['version']
+532        else:
+533            program = ''
+534        retd['program'] = program
+535        retd['version'] = version
+536        retd['who'] = file_origin['who']
+537        retd['date'] = file_origin['date']
+538        retd['host'] = file_origin['host']
+539        retd['description'] = descriptiond
+540        retd['enstags'] = enstags
+541        retd['obsdata'] = res
+542        return retd
+543    else:
+544        return res
 
@@ -1517,50 +1518,46 @@ None or False: No separator is inserted.
-
551def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
-552    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
-553
-554    Tags are not written or recovered automatically.
-555
-556    Parameters
-557    ----------
-558    fname : str
-559        Filename of the input file.
-560    noemtpy : bool
-561        If True, ensembles with no contribution to the Obs are not included.
-562        If False, ensembles are included as written in the file.
-563    full_output : bool
-564        If True, a dict containing auxiliary information and the data is returned.
-565        If False, only the data is returned as list.
-566    gz : bool
-567        If True, assumes that data is gzipped. If False, assumes XML file.
-568    separatior_insertion: str, int or bool
-569        str: replace all occurences of "separator_insertion" within the replica names
-570        by "|%s" % (separator_insertion) when constructing the names of the replica.
-571        int: Insert the separator "|" at the position given by separator_insertion.
-572        True (default): separator "|" is inserted after len(ensname), assuming that the
-573        ensemble name is a prefix to the replica name.
-574        None or False: No separator is inserted.
-575    """
-576
-577    if not fname.endswith('.xml') and not fname.endswith('.gz'):
-578        fname += '.xml'
-579    if gz:
-580        if not fname.endswith('.gz'):
-581            fname += '.gz'
-582        with gzip.open(fname, 'r') as fin:
-583            content = fin.read().decode('utf-8')
-584    else:
-585        if fname.endswith('.gz'):
-586            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
-587        with open(fname, 'r', encoding='utf-8') as fin:
-588            content = fin.read()
-589
-590    # open and read gzipped xml file
-591    infile = gzip.open(fname)
-592    content = infile.read()
-593
-594    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
+            
547def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
+548    """Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
+549
+550    Tags are not written or recovered automatically.
+551
+552    Parameters
+553    ----------
+554    fname : str
+555        Filename of the input file.
+556    noemtpy : bool
+557        If True, ensembles with no contribution to the Obs are not included.
+558        If False, ensembles are included as written in the file.
+559    full_output : bool
+560        If True, a dict containing auxiliary information and the data is returned.
+561        If False, only the data is returned as list.
+562    gz : bool
+563        If True, assumes that data is gzipped. If False, assumes XML file.
+564    separatior_insertion: str, int or bool
+565        str: replace all occurences of "separator_insertion" within the replica names
+566        by "|%s" % (separator_insertion) when constructing the names of the replica.
+567        int: Insert the separator "|" at the position given by separator_insertion.
+568        True (default): separator "|" is inserted after len(ensname), assuming that the
+569        ensemble name is a prefix to the replica name.
+570        None or False: No separator is inserted.
+571    """
+572
+573    if not fname.endswith('.xml') and not fname.endswith('.gz'):
+574        fname += '.xml'
+575    if gz:
+576        if not fname.endswith('.gz'):
+577            fname += '.gz'
+578        with gzip.open(fname, 'r') as fin:
+579            content = fin.read()
+580    else:
+581        if fname.endswith('.gz'):
+582            warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning)
+583        with open(fname, 'r') as fin:
+584            content = fin.read()
+585
+586    return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
 
@@ -1604,179 +1601,188 @@ None or False: No separator is inserted.
-
656def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
-657    """Generate the string for the export of a list of Obs or structures containing Obs
-658    to a .xml.gz file according to the Zeuthen dobs format.
-659
-660    Tags are not written or recovered automatically. The separator |is removed from the replica names.
-661
-662    Parameters
-663    ----------
-664    obsl : list
-665        List of Obs that will be exported.
-666        The Obs inside a structure do not have to be defined on the same set of configurations,
-667        but the storage requirement is increased, if this is not the case.
-668    name : str
-669        The name of the observable.
-670    spec : str
-671        Optional string that describes the contents of the file.
-672    origin : str
-673        Specify where the data has its origin.
-674    symbol : list
-675        A list of symbols that describe the observables to be written. May be empty.
-676    who : str
-677        Provide the name of the person that exports the data.
-678    enstags : dict
-679        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
-680        Otherwise, the ensemble name is used.
-681    """
-682    if enstags is None:
-683        enstags = {}
-684    od = {}
-685    r_names = []
-686    for o in obsl:
-687        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
-688    r_names = sorted(set(r_names))
-689    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
-690    for tmpname in mc_names:
-691        if tmpname not in enstags:
-692            enstags[tmpname] = tmpname
-693    ne = len(set(mc_names))
-694    cov_names = []
-695    for o in obsl:
-696        cov_names += list(o.cov_names)
-697    cov_names = sorted(set(cov_names))
-698    nc = len(set(cov_names))
-699    od['OBSERVABLES'] = {}
-700    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
-701    if who is None:
-702        who = getpass.getuser()
-703    od['OBSERVABLES']['origin'] = {
-704        'who': who,
-705        'date': str(datetime.datetime.now())[:-7],
-706        'host': socket.gethostname(),
-707        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
-708    od['OBSERVABLES']['dobs'] = {}
-709    pd = od['OBSERVABLES']['dobs']
-710    pd['spec'] = spec
-711    pd['origin'] = origin
-712    pd['name'] = name
-713    pd['array'] = {}
-714    pd['array']['id'] = 'val'
-715    pd['array']['layout'] = '1 f%d' % (len(obsl))
-716    osymbol = ''
-717    if symbol:
-718        if not isinstance(symbol, list):
-719            raise Exception('Symbol has to be a list!')
-720        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
-721            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
-722        osymbol = symbol[0]
-723        for s in symbol[1:]:
-724            osymbol += ' %s' % s
-725        pd['array']['symbol'] = osymbol
-726
-727    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
-728    pd['ne'] = '%d' % (ne)
-729    pd['nc'] = '%d' % (nc)
-730    pd['edata'] = []
-731    for name in mc_names:
-732        ed = {}
-733        ed['enstag'] = enstags[name]
-734        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
-735        nr = len(onames)
-736        ed['nr'] = nr
-737        ed[''] = []
-738
-739        for r in range(nr):
-740            ad = {}
-741            repname = onames[r]
-742            ad['id'] = repname.replace('|', '')
-743            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
-744            Nconf = len(idx)
-745            layout = '%d i f%d' % (Nconf, len(obsl))
-746            ad['layout'] = layout
-747            data = ''
-748            counters = [0 for o in obsl]
-749            for ci in idx:
-750                data += '%d ' % ci
-751                for oi in range(len(obsl)):
-752                    o = obsl[oi]
-753                    if repname in o.idl:
-754                        if counters[oi] < 0:
-755                            data += '0 '
-756                            continue
-757                        if o.idl[repname][counters[oi]] == ci:
-758                            num = o.deltas[repname][counters[oi]]
-759                            if num == 0:
-760                                data += '0 '
-761                            else:
-762                                data += '%1.16e ' % (num)
-763                            counters[oi] += 1
-764                            if counters[oi] >= len(o.idl[repname]):
-765                                counters[oi] = -1
-766                        else:
-767                            data += '0 '
-768                    else:
-769                        data += '0 '
-770                data += '\n'
-771            ad['#data'] = data
-772            ed[''].append(ad)
-773        pd['edata'].append(ed)
-774
-775        allcov = {}
-776        for o in obsl:
-777            for name in o.cov_names:
-778                if name in allcov:
-779                    if not np.array_equal(allcov[name], o.covobs[name].cov):
-780                        raise Exception('Inconsistent covariance matrices for %s!' % (name))
-781                else:
-782                    allcov[name] = o.covobs[name].cov
-783        pd['cdata'] = []
-784        for name in cov_names:
-785            cd = {}
-786            cd['id'] = name
-787
-788            covd = {'id': 'cov'}
-789            if allcov[name].shape == ():
-790                ncov = 1
-791                covd['layout'] = '1 1 f'
-792                covd['#data'] = '%1.14e' % (allcov[name])
-793            else:
-794                shape = allcov[name].shape
-795                assert (shape[0] == shape[1])
-796                ncov = shape[0]
-797                covd['layout'] = '%d %d f' % (ncov, ncov)
-798                ds = ''
-799                for i in range(ncov):
-800                    for j in range(ncov):
-801                        val = allcov[name][i][j]
-802                        if val == 0:
-803                            ds += '0 '
-804                        else:
-805                            ds += '%1.14e ' % (val)
-806                    ds += '\n'
-807                covd['#data'] = ds
-808
-809            gradd = {'id': 'grad'}
-810            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
-811            ds = ''
-812            for i in range(ncov):
-813                for o in obsl:
-814                    if name in o.covobs:
-815                        val = o.covobs[name].grad[i]
-816                        if val != 0:
-817                            ds += '%1.14e ' % (val)
-818                        else:
-819                            ds += '0 '
-820                    else:
-821                        ds += '0 '
-822            gradd['#data'] = ds
-823            cd['array'] = [covd, gradd]
-824            pd['cdata'].append(cd)
-825
-826    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
-827
-828    return rs
+            
648def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None):
+649    """Generate the string for the export of a list of Obs or structures containing Obs
+650    to a .xml.gz file according to the Zeuthen dobs format.
+651
+652    Tags are not written or recovered automatically. The separator |is removed from the replica names.
+653
+654    Parameters
+655    ----------
+656    obsl : list
+657        List of Obs that will be exported.
+658        The Obs inside a structure do not have to be defined on the same set of configurations,
+659        but the storage requirement is increased, if this is not the case.
+660    name : str
+661        The name of the observable.
+662    spec : str
+663        Optional string that describes the contents of the file.
+664    origin : str
+665        Specify where the data has its origin.
+666    symbol : list
+667        A list of symbols that describe the observables to be written. May be empty.
+668    who : str
+669        Provide the name of the person that exports the data.
+670    enstags : dict
+671        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
+672        Otherwise, the ensemble name is used.
+673    """
+674    if enstags is None:
+675        enstags = {}
+676    od = {}
+677    r_names = []
+678    for o in obsl:
+679        r_names += [name for name in o.names if name.split('|')[0] in o.mc_names]
+680    r_names = sorted(set(r_names))
+681    mc_names = sorted(set([n.split('|')[0] for n in r_names]))
+682    for tmpname in mc_names:
+683        if tmpname not in enstags:
+684            enstags[tmpname] = tmpname
+685    ne = len(set(mc_names))
+686    cov_names = []
+687    for o in obsl:
+688        cov_names += list(o.cov_names)
+689    cov_names = sorted(set(cov_names))
+690    nc = len(set(cov_names))
+691    od['OBSERVABLES'] = {}
+692    od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'}
+693    if who is None:
+694        who = getpass.getuser()
+695    od['OBSERVABLES']['origin'] = {
+696        'who': who,
+697        'date': str(datetime.datetime.now())[:-7],
+698        'host': socket.gethostname(),
+699        'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}}
+700    od['OBSERVABLES']['dobs'] = {}
+701    pd = od['OBSERVABLES']['dobs']
+702    pd['spec'] = spec
+703    pd['origin'] = origin
+704    pd['name'] = name
+705    pd['array'] = {}
+706    pd['array']['id'] = 'val'
+707    pd['array']['layout'] = '1 f%d' % (len(obsl))
+708    osymbol = ''
+709    if symbol:
+710        if not isinstance(symbol, list):
+711            raise Exception('Symbol has to be a list!')
+712        if not (len(symbol) == 0 or len(symbol) == len(obsl)):
+713            raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl)))
+714        osymbol = symbol[0]
+715        for s in symbol[1:]:
+716            osymbol += ' %s' % s
+717        pd['array']['symbol'] = osymbol
+718
+719    pd['array']['#values'] = ['  '.join(['%1.16e' % o.value for o in obsl])]
+720    pd['ne'] = '%d' % (ne)
+721    pd['nc'] = '%d' % (nc)
+722    pd['edata'] = []
+723    for name in mc_names:
+724        ed = {}
+725        ed['enstag'] = enstags[name]
+726        onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)])
+727        nr = len(onames)
+728        ed['nr'] = nr
+729        ed[''] = []
+730
+731        for r in range(nr):
+732            ad = {}
+733            repname = onames[r]
+734            ad['id'] = repname.replace('|', '')
+735            idx = _merge_idx([o.idl.get(repname, []) for o in obsl])
+736            Nconf = len(idx)
+737            layout = '%d i f%d' % (Nconf, len(obsl))
+738            ad['layout'] = layout
+739            data = ''
+740            counters = [0 for o in obsl]
+741            offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl]
+742            for ci in idx:
+743                data += '%d ' % ci
+744                for oi in range(len(obsl)):
+745                    o = obsl[oi]
+746                    if repname in o.idl:
+747                        if counters[oi] < 0:
+748                            num = offsets[oi]
+749                            if num == 0:
+750                                data += '0 '
+751                            else:
+752                                data += '%1.16e ' % (num)
+753                            continue
+754                        if o.idl[repname][counters[oi]] == ci:
+755                            num = o.deltas[repname][counters[oi]] + offsets[oi]
+756                            if num == 0:
+757                                data += '0 '
+758                            else:
+759                                data += '%1.16e ' % (num)
+760                            counters[oi] += 1
+761                            if counters[oi] >= len(o.idl[repname]):
+762                                counters[oi] = -1
+763                        else:
+764                            num = offsets[oi]
+765                            if num == 0:
+766                                data += '0 '
+767                            else:
+768                                data += '%1.16e ' % (num)
+769                    else:
+770                        data += '0 '
+771                data += '\n'
+772            ad['#data'] = data
+773            ed[''].append(ad)
+774        pd['edata'].append(ed)
+775
+776        allcov = {}
+777        for o in obsl:
+778            for cname in o.cov_names:
+779                if cname in allcov:
+780                    if not np.array_equal(allcov[cname], o.covobs[cname].cov):
+781                        raise Exception('Inconsistent covariance matrices for %s!' % (cname))
+782                else:
+783                    allcov[cname] = o.covobs[cname].cov
+784        pd['cdata'] = []
+785        for cname in cov_names:
+786            cd = {}
+787            cd['id'] = cname
+788
+789            covd = {'id': 'cov'}
+790            if allcov[cname].shape == ():
+791                ncov = 1
+792                covd['layout'] = '1 1 f'
+793                covd['#data'] = '%1.14e' % (allcov[cname])
+794            else:
+795                shape = allcov[cname].shape
+796                assert (shape[0] == shape[1])
+797                ncov = shape[0]
+798                covd['layout'] = '%d %d f' % (ncov, ncov)
+799                ds = ''
+800                for i in range(ncov):
+801                    for j in range(ncov):
+802                        val = allcov[cname][i][j]
+803                        if val == 0:
+804                            ds += '0 '
+805                        else:
+806                            ds += '%1.14e ' % (val)
+807                    ds += '\n'
+808                covd['#data'] = ds
+809
+810            gradd = {'id': 'grad'}
+811            gradd['layout'] = '%d f%d' % (ncov, len(obsl))
+812            ds = ''
+813            for i in range(ncov):
+814                for o in obsl:
+815                    if cname in o.covobs:
+816                        val = o.covobs[cname].grad[i]
+817                        if val != 0:
+818                            ds += '%1.14e ' % (val)
+819                        else:
+820                            ds += '0 '
+821                    else:
+822                        ds += '0 '
+823            gradd['#data'] = ds
+824            cd['array'] = [covd, gradd]
+825            pd['cdata'].append(cd)
+826
+827    rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od)
+828
+829    return rs
 
@@ -1821,54 +1827,54 @@ Otherwise, the ensemble name is used.
-
831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
-832    """Export a list of Obs or structures containing Obs to a .xml.gz file
-833    according to the Zeuthen dobs format.
-834
-835    Tags are not written or recovered automatically. The separator | is removed from the replica names.
-836
-837    Parameters
-838    ----------
-839    obsl : list
-840        List of Obs that will be exported.
-841        The Obs inside a structure do not have to be defined on the same set of configurations,
-842        but the storage requirement is increased, if this is not the case.
-843    fname : str
-844        Filename of the output file.
-845    name : str
-846        The name of the observable.
-847    spec : str
-848        Optional string that describes the contents of the file.
-849    origin : str
-850        Specify where the data has its origin.
-851    symbol : list
-852        A list of symbols that describe the observables to be written. May be empty.
-853    who : str
-854        Provide the name of the person that exports the data.
-855    enstags : dict
-856        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
-857        Otherwise, the ensemble name is used.
-858    gz : bool
-859        If True, the output is a gzipped XML. If False, the output is a XML file.
-860    """
-861    if enstags is None:
-862        enstags = {}
-863
-864    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
-865
-866    if not fname.endswith('.xml') and not fname.endswith('.gz'):
-867        fname += '.xml'
-868
-869    if gz:
-870        if not fname.endswith('.gz'):
-871            fname += '.gz'
-872
-873        fp = gzip.open(fname, 'wb')
-874        fp.write(dobsstring.encode('utf-8'))
-875    else:
-876        fp = open(fname, 'w', encoding='utf-8')
-877        fp.write(dobsstring)
-878    fp.close()
+            
832def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True):
+833    """Export a list of Obs or structures containing Obs to a .xml.gz file
+834    according to the Zeuthen dobs format.
+835
+836    Tags are not written or recovered automatically. The separator | is removed from the replica names.
+837
+838    Parameters
+839    ----------
+840    obsl : list
+841        List of Obs that will be exported.
+842        The Obs inside a structure do not have to be defined on the same set of configurations,
+843        but the storage requirement is increased, if this is not the case.
+844    fname : str
+845        Filename of the output file.
+846    name : str
+847        The name of the observable.
+848    spec : str
+849        Optional string that describes the contents of the file.
+850    origin : str
+851        Specify where the data has its origin.
+852    symbol : list
+853        A list of symbols that describe the observables to be written. May be empty.
+854    who : str
+855        Provide the name of the person that exports the data.
+856    enstags : dict
+857        Provide alternative enstag for ensembles in the form enstags = {ename: enstag}
+858        Otherwise, the ensemble name is used.
+859    gz : bool
+860        If True, the output is a gzipped XML. If False, the output is a XML file.
+861    """
+862    if enstags is None:
+863        enstags = {}
+864
+865    dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags)
+866
+867    if not fname.endswith('.xml') and not fname.endswith('.gz'):
+868        fname += '.xml'
+869
+870    if gz:
+871        if not fname.endswith('.gz'):
+872            fname += '.gz'
+873
+874        fp = gzip.open(fname, 'wb')
+875        fp.write(dobsstring.encode('utf-8'))
+876    else:
+877        fp = open(fname, 'w', encoding='utf-8')
+878        fp.write(dobsstring)
+879    fp.close()