diff --git a/docs/pyerrors/input/dobs.html b/docs/pyerrors/input/dobs.html index bc8e50c3..5f72dc97 100644 --- a/docs/pyerrors/input/dobs.html +++ b/docs/pyerrors/input/dobs.html @@ -470,502 +470,503 @@ 379 return res 380 381 -382# Reading (and writing) dobs is not yet working properly: -383# we have to loop over root[2:] because each entry is a dobs -384# But maybe this is just a problem with Ben's implementation +382# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py +383def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): +384 """Import a list of Obs from a string in the Zeuthen dobs format. 385 -386# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py -387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): -388 """Import a list of Obs from a string in the Zeuthen dobs format. -389 -390 Tags are not written or recovered automatically. -391 -392 Parameters -393 ---------- -394 content : str -395 XML string containing the data -396 noemtpy : bool -397 If True, ensembles with no contribution to the Obs are not included. -398 If False, ensembles are included as written in the file, possibly with vanishing entries. -399 full_output : bool -400 If True, a dict containing auxiliary information and the data is returned. -401 If False, only the data is returned as list. -402 separatior_insertion: str, int or bool -403 str: replace all occurences of "separator_insertion" within the replica names -404 by "|%s" % (separator_insertion) when constructing the names of the replica. -405 int: Insert the separator "|" at the position given by separator_insertion. -406 True (default): separator "|" is inserted after len(ensname), assuming that the -407 ensemble name is a prefix to the replica name. -408 None or False: No separator is inserted. -409 """ -410 -411 root = et.fromstring(content) +386 Tags are not written or recovered automatically. +387 +388 Parameters +389 ---------- +390 content : str +391 XML string containing the data +392 noemtpy : bool +393 If True, ensembles with no contribution to the Obs are not included. +394 If False, ensembles are included as written in the file, possibly with vanishing entries. +395 full_output : bool +396 If True, a dict containing auxiliary information and the data is returned. +397 If False, only the data is returned as list. +398 separatior_insertion: str, int or bool +399 str: replace all occurences of "separator_insertion" within the replica names +400 by "|%s" % (separator_insertion) when constructing the names of the replica. +401 int: Insert the separator "|" at the position given by separator_insertion. +402 True (default): separator "|" is inserted after len(ensname), assuming that the +403 ensemble name is a prefix to the replica name. +404 None or False: No separator is inserted. +405 """ +406 +407 root = et.fromstring(content) +408 +409 _check(root.tag == 'OBSERVABLES') +410 _check(root[0].tag == 'SCHEMA') +411 version = root[0][1].text.strip() 412 -413 _check(root.tag == 'OBSERVABLES') -414 _check(root[0].tag == 'SCHEMA') -415 version = root[0][1].text.strip() -416 -417 _check(root[1].tag == 'origin') -418 file_origin = _etree_to_dict(root[1])['origin'] +413 _check(root[1].tag == 'origin') +414 file_origin = _etree_to_dict(root[1])['origin'] +415 +416 _check(root[2].tag == 'dobs') +417 +418 dobs = root[2] 419 -420 _check(root[2].tag == 'dobs') -421 -422 dobs = root[2] +420 descriptiond = {} +421 for i in range(3): +422 descriptiond[dobs[i].tag] = dobs[i].text.strip() 423 -424 descriptiond = {} -425 for i in range(3): -426 descriptiond[dobs[i].tag] = dobs[i].text.strip() -427 -428 _check(dobs[3].tag == 'array') -429 -430 symbol = [] -431 if dobs[3][1].tag == 'symbol': -432 symbol = dobs[3][1].text.strip() -433 descriptiond['symbol'] = symbol -434 mean = _import_array(dobs[3])[0] -435 -436 _check(dobs[4].tag == "ne") -437 ne = int(dobs[4].text.strip()) -438 _check(dobs[5].tag == "nc") -439 nc = int(dobs[5].text.strip()) -440 -441 idld = {} -442 deltad = {} -443 covd = {} -444 gradd = {} -445 names = [] -446 e_names = [] -447 enstags = {} -448 for k in range(6, len(list(dobs))): -449 if dobs[k].tag == "edata": -450 _check(dobs[k][0].tag == "enstag") -451 ename = dobs[k][0].text.strip() -452 e_names.append(ename) -453 _check(dobs[k][1].tag == "nr") -454 R = int(dobs[k][1].text.strip()) -455 for i in range(2, 2 + R): -456 deltas, rname, idx = _import_rdata(dobs[k][i]) -457 if separator_insertion is None or False: -458 pass -459 elif separator_insertion is True: -460 if rname.startswith(ename): -461 rname = rname[:len(ename)] + '|' + rname[len(ename):] -462 elif isinstance(separator_insertion, int): -463 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] -464 elif isinstance(separator_insertion, str): -465 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) +424 _check(dobs[3].tag == 'array') +425 +426 symbol = [] +427 if dobs[3][1].tag == 'symbol': +428 symbol = dobs[3][1].text.strip() +429 descriptiond['symbol'] = symbol +430 mean = _import_array(dobs[3])[0] +431 +432 _check(dobs[4].tag == "ne") +433 ne = int(dobs[4].text.strip()) +434 _check(dobs[5].tag == "nc") +435 nc = int(dobs[5].text.strip()) +436 +437 idld = {} +438 deltad = {} +439 covd = {} +440 gradd = {} +441 names = [] +442 e_names = [] +443 enstags = {} +444 for k in range(6, len(list(dobs))): +445 if dobs[k].tag == "edata": +446 _check(dobs[k][0].tag == "enstag") +447 ename = dobs[k][0].text.strip() +448 e_names.append(ename) +449 _check(dobs[k][1].tag == "nr") +450 R = int(dobs[k][1].text.strip()) +451 for i in range(2, 2 + R): +452 deltas, rname, idx = _import_rdata(dobs[k][i]) +453 if separator_insertion is None or False: +454 pass +455 elif separator_insertion is True: +456 if rname.startswith(ename): +457 rname = rname[:len(ename)] + '|' + rname[len(ename):] +458 elif isinstance(separator_insertion, int): +459 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] +460 elif isinstance(separator_insertion, str): +461 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) +462 else: +463 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) +464 if '|' in rname: +465 new_ename = rname[:rname.index('|')] 466 else: -467 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) -468 if '|' in rname: -469 new_ename = rname[:rname.index('|')] -470 else: -471 new_ename = ename -472 enstags[new_ename] = ename -473 idld[rname] = idx -474 deltad[rname] = deltas -475 names.append(rname) -476 elif dobs[k].tag == "cdata": -477 cname, cov, grad = _import_cdata(dobs[k]) -478 covd[cname] = cov -479 if grad.shape[1] == 1: -480 gradd[cname] = [grad for i in range(len(mean))] -481 else: -482 gradd[cname] = grad.T -483 else: -484 _check(False) -485 names = list(set(names)) +467 new_ename = ename +468 enstags[new_ename] = ename +469 idld[rname] = idx +470 deltad[rname] = deltas +471 names.append(rname) +472 elif dobs[k].tag == "cdata": +473 cname, cov, grad = _import_cdata(dobs[k]) +474 covd[cname] = cov +475 if grad.shape[1] == 1: +476 gradd[cname] = [grad for i in range(len(mean))] +477 else: +478 gradd[cname] = grad.T +479 else: +480 _check(False) +481 names = list(set(names)) +482 +483 for name in names: +484 for i in range(len(deltad[name])): +485 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 486 -487 for name in names: -488 for i in range(len(deltad[name])): -489 deltad[name][i] = np.array(deltad[name][i]) + mean[i] -490 -491 res = [] -492 for i in range(len(mean)): -493 deltas = [] -494 idl = [] -495 obs_names = [] -496 for name in names: -497 h = np.unique(deltad[name][i]) -498 if len(h) == 1 and np.all(h == mean[i]) and noempty: -499 continue -500 deltas.append(deltad[name][i]) -501 obs_names.append(name) -502 idl.append(idld[name]) -503 res.append(Obs(deltas, obs_names, idl=idl)) -504 print(mean, 'vs', res) -505 _check(len(e_names) == ne) -506 -507 cnames = list(covd.keys()) -508 for i in range(len(res)): -509 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} -510 if noempty: -511 for name in cnames: -512 if np.all(new_covobs[name].grad == 0): -513 del new_covobs[name] -514 cnames_loc = list(new_covobs.keys()) -515 else: -516 cnames_loc = cnames -517 for name in cnames_loc: -518 res[i].names.append(name) -519 res[i].shape[name] = 1 -520 res[i].idl[name] = [] -521 res[i]._covobs = new_covobs -522 -523 if symbol: -524 for i in range(len(res)): -525 res[i].tag = symbol[i] -526 if res[i].tag == 'None': -527 res[i].tag = None -528 if not noempty: -529 _check(len(res[0].covobs.keys()) == nc) -530 if full_output: -531 retd = {} -532 tool = file_origin.get('tool', None) -533 if tool: -534 program = tool['name'] + ' ' + tool['version'] -535 else: -536 program = '' -537 retd['program'] = program -538 retd['version'] = version -539 retd['who'] = file_origin['who'] -540 retd['date'] = file_origin['date'] -541 retd['host'] = file_origin['host'] -542 retd['description'] = descriptiond -543 retd['enstags'] = enstags -544 retd['obsdata'] = res -545 return retd -546 else: -547 return res +487 res = [] +488 for i in range(len(mean)): +489 deltas = [] +490 idl = [] +491 obs_names = [] +492 for name in names: +493 h = np.unique(deltad[name][i]) +494 if len(h) == 1 and np.all(h == mean[i]) and noempty: +495 continue +496 deltas.append(deltad[name][i]) +497 obs_names.append(name) +498 idl.append(idld[name]) +499 res.append(Obs(deltas, obs_names, idl=idl)) +500 res[-1]._value = mean[i] +501 _check(len(e_names) == ne) +502 +503 cnames = list(covd.keys()) +504 for i in range(len(res)): +505 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} +506 if noempty: +507 for name in cnames: +508 if np.all(new_covobs[name].grad == 0): +509 del new_covobs[name] +510 cnames_loc = list(new_covobs.keys()) +511 else: +512 cnames_loc = cnames +513 for name in cnames_loc: +514 res[i].names.append(name) +515 res[i].shape[name] = 1 +516 res[i].idl[name] = [] +517 res[i]._covobs = new_covobs +518 +519 if symbol: +520 for i in range(len(res)): +521 res[i].tag = symbol[i] +522 if res[i].tag == 'None': +523 res[i].tag = None +524 if not noempty: +525 _check(len(res[0].covobs.keys()) == nc) +526 if full_output: +527 retd = {} +528 tool = file_origin.get('tool', None) +529 if tool: +530 program = tool['name'] + ' ' + tool['version'] +531 else: +532 program = '' +533 retd['program'] = program +534 retd['version'] = version +535 retd['who'] = file_origin['who'] +536 retd['date'] = file_origin['date'] +537 retd['host'] = file_origin['host'] +538 retd['description'] = descriptiond +539 retd['enstags'] = enstags +540 retd['obsdata'] = res +541 return retd +542 else: +543 return res +544 +545 +546def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): +547 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 548 -549 -550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): -551 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. -552 -553 Tags are not written or recovered automatically. -554 -555 Parameters -556 ---------- -557 fname : str -558 Filename of the input file. -559 noemtpy : bool -560 If True, ensembles with no contribution to the Obs are not included. -561 If False, ensembles are included as written in the file. -562 full_output : bool -563 If True, a dict containing auxiliary information and the data is returned. -564 If False, only the data is returned as list. -565 gz : bool -566 If True, assumes that data is gzipped. If False, assumes XML file. -567 separatior_insertion: str, int or bool -568 str: replace all occurences of "separator_insertion" within the replica names -569 by "|%s" % (separator_insertion) when constructing the names of the replica. -570 int: Insert the separator "|" at the position given by separator_insertion. -571 True (default): separator "|" is inserted after len(ensname), assuming that the -572 ensemble name is a prefix to the replica name. -573 None or False: No separator is inserted. -574 """ -575 -576 if not fname.endswith('.xml') and not fname.endswith('.gz'): -577 fname += '.xml' -578 if gz: -579 if not fname.endswith('.gz'): -580 fname += '.gz' -581 with gzip.open(fname, 'r') as fin: -582 content = fin.read().decode('utf-8') -583 else: -584 if fname.endswith('.gz'): -585 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) -586 with open(fname, 'r', encoding='utf-8') as fin: -587 content = fin.read() -588 -589 # open and read gzipped xml file -590 infile = gzip.open(fname) -591 content = infile.read() -592 -593 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) -594 -595 -596def _dobsdict_to_xmlstring(d): -597 if isinstance(d, dict): -598 iters = '' -599 for k in d: -600 if k.startswith('#value'): -601 for li in d[k]: -602 iters += li -603 return iters + '\n' -604 elif k.startswith('#'): -605 for li in d[k]: -606 iters += li -607 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') -608 return iters -609 if isinstance(d[k], dict): -610 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) -611 elif isinstance(d[k], str): -612 if len(d[k]) > 100: -613 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) -614 else: -615 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) -616 elif isinstance(d[k], list): -617 tmps = '' -618 if k in ['edata', 'cdata']: -619 for i in range(len(d[k])): -620 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) -621 else: -622 for i in range(len(d[k])): -623 tmps += _dobsdict_to_xmlstring(d[k][i]) -624 iters += tmps -625 elif isinstance(d[k], (int, float)): -626 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) -627 elif not d[k]: -628 return '\n' -629 else: -630 raise Exception('Type', type(d[k]), 'not supported in export!') -631 else: -632 raise Exception('Type', type(d), 'not supported in export!') -633 return iters -634 -635 -636def _dobsdict_to_xmlstring_spaces(d, space=' '): -637 s = _dobsdict_to_xmlstring(d) -638 o = '' -639 c = 0 -640 cm = False -641 for li in s.split('\n'): -642 if li.startswith('<%s' % ('/')): -643 c -= 1 -644 cm = True -645 for i in range(c): -646 o += space -647 o += li + '\n' -648 if li.startswith('<') and not cm: -649 if not '<%s' % ('/') in li: -650 c += 1 -651 cm = False -652 return o -653 -654 -655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): -656 """Generate the string for the export of a list of Obs or structures containing Obs -657 to a .xml.gz file according to the Zeuthen dobs format. -658 -659 Tags are not written or recovered automatically. The separator |is removed from the replica names. -660 -661 Parameters -662 ---------- -663 obsl : list -664 List of Obs that will be exported. -665 The Obs inside a structure do not have to be defined on the same set of configurations, -666 but the storage requirement is increased, if this is not the case. -667 name : str -668 The name of the observable. -669 spec : str -670 Optional string that describes the contents of the file. -671 origin : str -672 Specify where the data has its origin. -673 symbol : list -674 A list of symbols that describe the observables to be written. May be empty. -675 who : str -676 Provide the name of the person that exports the data. -677 enstags : dict -678 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} -679 Otherwise, the ensemble name is used. -680 """ -681 if enstags is None: -682 enstags = {} -683 od = {} -684 r_names = [] -685 for o in obsl: -686 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] -687 r_names = sorted(set(r_names)) -688 mc_names = sorted(set([n.split('|')[0] for n in r_names])) -689 for tmpname in mc_names: -690 if tmpname not in enstags: -691 enstags[tmpname] = tmpname -692 ne = len(set(mc_names)) -693 cov_names = [] -694 for o in obsl: -695 cov_names += list(o.cov_names) -696 cov_names = sorted(set(cov_names)) -697 nc = len(set(cov_names)) -698 od['OBSERVABLES'] = {} -699 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} -700 if who is None: -701 who = getpass.getuser() -702 od['OBSERVABLES']['origin'] = { -703 'who': who, -704 'date': str(datetime.datetime.now())[:-7], -705 'host': socket.gethostname(), -706 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} -707 od['OBSERVABLES']['dobs'] = {} -708 pd = od['OBSERVABLES']['dobs'] -709 pd['spec'] = spec -710 pd['origin'] = origin -711 pd['name'] = name -712 pd['array'] = {} -713 pd['array']['id'] = 'val' -714 pd['array']['layout'] = '1 f%d' % (len(obsl)) -715 osymbol = '' -716 if symbol: -717 if not isinstance(symbol, list): -718 raise Exception('Symbol has to be a list!') -719 if not (len(symbol) == 0 or len(symbol) == len(obsl)): -720 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) -721 osymbol = symbol[0] -722 for s in symbol[1:]: -723 osymbol += ' %s' % s -724 pd['array']['symbol'] = osymbol -725 -726 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] -727 pd['ne'] = '%d' % (ne) -728 pd['nc'] = '%d' % (nc) -729 pd['edata'] = [] -730 for name in mc_names: -731 ed = {} -732 ed['enstag'] = enstags[name] -733 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) -734 nr = len(onames) -735 ed['nr'] = nr -736 ed[''] = [] -737 -738 for r in range(nr): -739 ad = {} -740 repname = onames[r] -741 ad['id'] = repname.replace('|', '') -742 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) -743 Nconf = len(idx) -744 layout = '%d i f%d' % (Nconf, len(obsl)) -745 ad['layout'] = layout -746 data = '' -747 counters = [0 for o in obsl] -748 for ci in idx: -749 data += '%d ' % ci -750 for oi in range(len(obsl)): -751 o = obsl[oi] -752 if repname in o.idl: -753 if counters[oi] < 0: -754 data += '0 ' -755 continue -756 if o.idl[repname][counters[oi]] == ci: -757 num = o.deltas[repname][counters[oi]] -758 if num == 0: -759 data += '0 ' -760 else: -761 data += '%1.16e ' % (num) -762 counters[oi] += 1 -763 if counters[oi] >= len(o.idl[repname]): -764 counters[oi] = -1 -765 else: -766 data += '0 ' -767 else: -768 data += '0 ' -769 data += '\n' -770 ad['#data'] = data -771 ed[''].append(ad) -772 pd['edata'].append(ed) -773 -774 allcov = {} -775 for o in obsl: -776 for name in o.cov_names: -777 if name in allcov: -778 if not np.array_equal(allcov[name], o.covobs[name].cov): -779 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -780 else: -781 allcov[name] = o.covobs[name].cov -782 pd['cdata'] = [] -783 for name in cov_names: -784 cd = {} -785 cd['id'] = name -786 -787 covd = {'id': 'cov'} -788 if allcov[name].shape == (): -789 ncov = 1 -790 covd['layout'] = '1 1 f' -791 covd['#data'] = '%1.14e' % (allcov[name]) -792 else: -793 shape = allcov[name].shape -794 assert (shape[0] == shape[1]) -795 ncov = shape[0] -796 covd['layout'] = '%d %d f' % (ncov, ncov) -797 ds = '' -798 for i in range(ncov): -799 for j in range(ncov): -800 val = allcov[name][i][j] -801 if val == 0: -802 ds += '0 ' -803 else: -804 ds += '%1.14e ' % (val) -805 ds += '\n' -806 covd['#data'] = ds -807 -808 gradd = {'id': 'grad'} -809 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) -810 ds = '' -811 for i in range(ncov): -812 for o in obsl: -813 if name in o.covobs: -814 val = o.covobs[name].grad[i] -815 if val != 0: -816 ds += '%1.14e ' % (val) -817 else: -818 ds += '0 ' -819 else: -820 ds += '0 ' -821 gradd['#data'] = ds -822 cd['array'] = [covd, gradd] -823 pd['cdata'].append(cd) -824 -825 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) -826 -827 return rs -828 +549 Tags are not written or recovered automatically. +550 +551 Parameters +552 ---------- +553 fname : str +554 Filename of the input file. +555 noemtpy : bool +556 If True, ensembles with no contribution to the Obs are not included. +557 If False, ensembles are included as written in the file. +558 full_output : bool +559 If True, a dict containing auxiliary information and the data is returned. +560 If False, only the data is returned as list. +561 gz : bool +562 If True, assumes that data is gzipped. If False, assumes XML file. +563 separatior_insertion: str, int or bool +564 str: replace all occurences of "separator_insertion" within the replica names +565 by "|%s" % (separator_insertion) when constructing the names of the replica. +566 int: Insert the separator "|" at the position given by separator_insertion. +567 True (default): separator "|" is inserted after len(ensname), assuming that the +568 ensemble name is a prefix to the replica name. +569 None or False: No separator is inserted. +570 """ +571 +572 if not fname.endswith('.xml') and not fname.endswith('.gz'): +573 fname += '.xml' +574 if gz: +575 if not fname.endswith('.gz'): +576 fname += '.gz' +577 with gzip.open(fname, 'r') as fin: +578 content = fin.read() +579 else: +580 if fname.endswith('.gz'): +581 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) +582 with open(fname, 'r') as fin: +583 content = fin.read() +584 +585 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) +586 +587 +588def _dobsdict_to_xmlstring(d): +589 if isinstance(d, dict): +590 iters = '' +591 for k in d: +592 if k.startswith('#value'): +593 for li in d[k]: +594 iters += li +595 return iters + '\n' +596 elif k.startswith('#'): +597 for li in d[k]: +598 iters += li +599 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') +600 return iters +601 if isinstance(d[k], dict): +602 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) +603 elif isinstance(d[k], str): +604 if len(d[k]) > 100: +605 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) +606 else: +607 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) +608 elif isinstance(d[k], list): +609 tmps = '' +610 if k in ['edata', 'cdata']: +611 for i in range(len(d[k])): +612 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) +613 else: +614 for i in range(len(d[k])): +615 tmps += _dobsdict_to_xmlstring(d[k][i]) +616 iters += tmps +617 elif isinstance(d[k], (int, float)): +618 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) +619 elif not d[k]: +620 return '\n' +621 else: +622 raise Exception('Type', type(d[k]), 'not supported in export!') +623 else: +624 raise Exception('Type', type(d), 'not supported in export!') +625 return iters +626 +627 +628def _dobsdict_to_xmlstring_spaces(d, space=' '): +629 s = _dobsdict_to_xmlstring(d) +630 o = '' +631 c = 0 +632 cm = False +633 for li in s.split('\n'): +634 if li.startswith('<%s' % ('/')): +635 c -= 1 +636 cm = True +637 for i in range(c): +638 o += space +639 o += li + '\n' +640 if li.startswith('<') and not cm: +641 if not '<%s' % ('/') in li: +642 c += 1 +643 cm = False +644 return o +645 +646 +647def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): +648 """Generate the string for the export of a list of Obs or structures containing Obs +649 to a .xml.gz file according to the Zeuthen dobs format. +650 +651 Tags are not written or recovered automatically. The separator |is removed from the replica names. +652 +653 Parameters +654 ---------- +655 obsl : list +656 List of Obs that will be exported. +657 The Obs inside a structure do not have to be defined on the same set of configurations, +658 but the storage requirement is increased, if this is not the case. +659 name : str +660 The name of the observable. +661 spec : str +662 Optional string that describes the contents of the file. +663 origin : str +664 Specify where the data has its origin. +665 symbol : list +666 A list of symbols that describe the observables to be written. May be empty. +667 who : str +668 Provide the name of the person that exports the data. +669 enstags : dict +670 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} +671 Otherwise, the ensemble name is used. +672 """ +673 if enstags is None: +674 enstags = {} +675 od = {} +676 r_names = [] +677 for o in obsl: +678 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] +679 r_names = sorted(set(r_names)) +680 mc_names = sorted(set([n.split('|')[0] for n in r_names])) +681 for tmpname in mc_names: +682 if tmpname not in enstags: +683 enstags[tmpname] = tmpname +684 ne = len(set(mc_names)) +685 cov_names = [] +686 for o in obsl: +687 cov_names += list(o.cov_names) +688 cov_names = sorted(set(cov_names)) +689 nc = len(set(cov_names)) +690 od['OBSERVABLES'] = {} +691 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} +692 if who is None: +693 who = getpass.getuser() +694 od['OBSERVABLES']['origin'] = { +695 'who': who, +696 'date': str(datetime.datetime.now())[:-7], +697 'host': socket.gethostname(), +698 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} +699 od['OBSERVABLES']['dobs'] = {} +700 pd = od['OBSERVABLES']['dobs'] +701 pd['spec'] = spec +702 pd['origin'] = origin +703 pd['name'] = name +704 pd['array'] = {} +705 pd['array']['id'] = 'val' +706 pd['array']['layout'] = '1 f%d' % (len(obsl)) +707 osymbol = '' +708 if symbol: +709 if not isinstance(symbol, list): +710 raise Exception('Symbol has to be a list!') +711 if not (len(symbol) == 0 or len(symbol) == len(obsl)): +712 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) +713 osymbol = symbol[0] +714 for s in symbol[1:]: +715 osymbol += ' %s' % s +716 pd['array']['symbol'] = osymbol +717 +718 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] +719 pd['ne'] = '%d' % (ne) +720 pd['nc'] = '%d' % (nc) +721 pd['edata'] = [] +722 for name in mc_names: +723 ed = {} +724 ed['enstag'] = enstags[name] +725 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) +726 nr = len(onames) +727 ed['nr'] = nr +728 ed[''] = [] +729 +730 for r in range(nr): +731 ad = {} +732 repname = onames[r] +733 ad['id'] = repname.replace('|', '') +734 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) +735 Nconf = len(idx) +736 layout = '%d i f%d' % (Nconf, len(obsl)) +737 ad['layout'] = layout +738 data = '' +739 counters = [0 for o in obsl] +740 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] +741 for ci in idx: +742 data += '%d ' % ci +743 for oi in range(len(obsl)): +744 o = obsl[oi] +745 if repname in o.idl: +746 if counters[oi] < 0: +747 num = offsets[oi] +748 if num == 0: +749 data += '0 ' +750 else: +751 data += '%1.16e ' % (num) +752 continue +753 if o.idl[repname][counters[oi]] == ci: +754 num = o.deltas[repname][counters[oi]] + offsets[oi] +755 if num == 0: +756 data += '0 ' +757 else: +758 data += '%1.16e ' % (num) +759 counters[oi] += 1 +760 if counters[oi] >= len(o.idl[repname]): +761 counters[oi] = -1 +762 else: +763 num = offsets[oi] +764 if num == 0: +765 data += '0 ' +766 else: +767 data += '%1.16e ' % (num) +768 else: +769 data += '0 ' +770 data += '\n' +771 ad['#data'] = data +772 ed[''].append(ad) +773 pd['edata'].append(ed) +774 +775 allcov = {} +776 for o in obsl: +777 for cname in o.cov_names: +778 if cname in allcov: +779 if not np.array_equal(allcov[cname], o.covobs[cname].cov): +780 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) +781 else: +782 allcov[cname] = o.covobs[cname].cov +783 pd['cdata'] = [] +784 for cname in cov_names: +785 cd = {} +786 cd['id'] = cname +787 +788 covd = {'id': 'cov'} +789 if allcov[cname].shape == (): +790 ncov = 1 +791 covd['layout'] = '1 1 f' +792 covd['#data'] = '%1.14e' % (allcov[cname]) +793 else: +794 shape = allcov[cname].shape +795 assert (shape[0] == shape[1]) +796 ncov = shape[0] +797 covd['layout'] = '%d %d f' % (ncov, ncov) +798 ds = '' +799 for i in range(ncov): +800 for j in range(ncov): +801 val = allcov[cname][i][j] +802 if val == 0: +803 ds += '0 ' +804 else: +805 ds += '%1.14e ' % (val) +806 ds += '\n' +807 covd['#data'] = ds +808 +809 gradd = {'id': 'grad'} +810 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) +811 ds = '' +812 for i in range(ncov): +813 for o in obsl: +814 if cname in o.covobs: +815 val = o.covobs[cname].grad[i] +816 if val != 0: +817 ds += '%1.14e ' % (val) +818 else: +819 ds += '0 ' +820 else: +821 ds += '0 ' +822 gradd['#data'] = ds +823 cd['array'] = [covd, gradd] +824 pd['cdata'].append(cd) +825 +826 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) +827 +828 return rs 829 -830def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): -831 """Export a list of Obs or structures containing Obs to a .xml.gz file -832 according to the Zeuthen dobs format. -833 -834 Tags are not written or recovered automatically. The separator | is removed from the replica names. -835 -836 Parameters -837 ---------- -838 obsl : list -839 List of Obs that will be exported. -840 The Obs inside a structure do not have to be defined on the same set of configurations, -841 but the storage requirement is increased, if this is not the case. -842 fname : str -843 Filename of the output file. -844 name : str -845 The name of the observable. -846 spec : str -847 Optional string that describes the contents of the file. -848 origin : str -849 Specify where the data has its origin. -850 symbol : list -851 A list of symbols that describe the observables to be written. May be empty. -852 who : str -853 Provide the name of the person that exports the data. -854 enstags : dict -855 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} -856 Otherwise, the ensemble name is used. -857 gz : bool -858 If True, the output is a gzipped XML. If False, the output is a XML file. -859 """ -860 if enstags is None: -861 enstags = {} -862 -863 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) -864 -865 if not fname.endswith('.xml') and not fname.endswith('.gz'): -866 fname += '.xml' -867 -868 if gz: -869 if not fname.endswith('.gz'): -870 fname += '.gz' -871 -872 fp = gzip.open(fname, 'wb') -873 fp.write(dobsstring.encode('utf-8')) -874 else: -875 fp = open(fname, 'w', encoding='utf-8') -876 fp.write(dobsstring) -877 fp.close() +830 +831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): +832 """Export a list of Obs or structures containing Obs to a .xml.gz file +833 according to the Zeuthen dobs format. +834 +835 Tags are not written or recovered automatically. The separator | is removed from the replica names. +836 +837 Parameters +838 ---------- +839 obsl : list +840 List of Obs that will be exported. +841 The Obs inside a structure do not have to be defined on the same set of configurations, +842 but the storage requirement is increased, if this is not the case. +843 fname : str +844 Filename of the output file. +845 name : str +846 The name of the observable. +847 spec : str +848 Optional string that describes the contents of the file. +849 origin : str +850 Specify where the data has its origin. +851 symbol : list +852 A list of symbols that describe the observables to be written. May be empty. +853 who : str +854 Provide the name of the person that exports the data. +855 enstags : dict +856 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} +857 Otherwise, the ensemble name is used. +858 gz : bool +859 If True, the output is a gzipped XML. If False, the output is a XML file. +860 """ +861 if enstags is None: +862 enstags = {} +863 +864 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) +865 +866 if not fname.endswith('.xml') and not fname.endswith('.gz'): +867 fname += '.xml' +868 +869 if gz: +870 if not fname.endswith('.gz'): +871 fname += '.gz' +872 +873 fp = gzip.open(fname, 'wb') +874 fp.write(dobsstring.encode('utf-8')) +875 else: +876 fp = open(fname, 'w', encoding='utf-8') +877 fp.write(dobsstring) +878 fp.close() @@ -1315,167 +1316,167 @@ None (default): Replica names remain unchanged. -
388def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): -389 """Import a list of Obs from a string in the Zeuthen dobs format. -390 -391 Tags are not written or recovered automatically. -392 -393 Parameters -394 ---------- -395 content : str -396 XML string containing the data -397 noemtpy : bool -398 If True, ensembles with no contribution to the Obs are not included. -399 If False, ensembles are included as written in the file, possibly with vanishing entries. -400 full_output : bool -401 If True, a dict containing auxiliary information and the data is returned. -402 If False, only the data is returned as list. -403 separatior_insertion: str, int or bool -404 str: replace all occurences of "separator_insertion" within the replica names -405 by "|%s" % (separator_insertion) when constructing the names of the replica. -406 int: Insert the separator "|" at the position given by separator_insertion. -407 True (default): separator "|" is inserted after len(ensname), assuming that the -408 ensemble name is a prefix to the replica name. -409 None or False: No separator is inserted. -410 """ -411 -412 root = et.fromstring(content) +@@ -1517,50 +1518,46 @@ None or False: No separator is inserted.384def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): +385 """Import a list of Obs from a string in the Zeuthen dobs format. +386 +387 Tags are not written or recovered automatically. +388 +389 Parameters +390 ---------- +391 content : str +392 XML string containing the data +393 noemtpy : bool +394 If True, ensembles with no contribution to the Obs are not included. +395 If False, ensembles are included as written in the file, possibly with vanishing entries. +396 full_output : bool +397 If True, a dict containing auxiliary information and the data is returned. +398 If False, only the data is returned as list. +399 separatior_insertion: str, int or bool +400 str: replace all occurences of "separator_insertion" within the replica names +401 by "|%s" % (separator_insertion) when constructing the names of the replica. +402 int: Insert the separator "|" at the position given by separator_insertion. +403 True (default): separator "|" is inserted after len(ensname), assuming that the +404 ensemble name is a prefix to the replica name. +405 None or False: No separator is inserted. +406 """ +407 +408 root = et.fromstring(content) +409 +410 _check(root.tag == 'OBSERVABLES') +411 _check(root[0].tag == 'SCHEMA') +412 version = root[0][1].text.strip() 413 -414 _check(root.tag == 'OBSERVABLES') -415 _check(root[0].tag == 'SCHEMA') -416 version = root[0][1].text.strip() -417 -418 _check(root[1].tag == 'origin') -419 file_origin = _etree_to_dict(root[1])['origin'] +414 _check(root[1].tag == 'origin') +415 file_origin = _etree_to_dict(root[1])['origin'] +416 +417 _check(root[2].tag == 'dobs') +418 +419 dobs = root[2] 420 -421 _check(root[2].tag == 'dobs') -422 -423 dobs = root[2] +421 descriptiond = {} +422 for i in range(3): +423 descriptiond[dobs[i].tag] = dobs[i].text.strip() 424 -425 descriptiond = {} -426 for i in range(3): -427 descriptiond[dobs[i].tag] = dobs[i].text.strip() -428 -429 _check(dobs[3].tag == 'array') -430 -431 symbol = [] -432 if dobs[3][1].tag == 'symbol': -433 symbol = dobs[3][1].text.strip() -434 descriptiond['symbol'] = symbol -435 mean = _import_array(dobs[3])[0] -436 -437 _check(dobs[4].tag == "ne") -438 ne = int(dobs[4].text.strip()) -439 _check(dobs[5].tag == "nc") -440 nc = int(dobs[5].text.strip()) -441 -442 idld = {} -443 deltad = {} -444 covd = {} -445 gradd = {} -446 names = [] -447 e_names = [] -448 enstags = {} -449 for k in range(6, len(list(dobs))): -450 if dobs[k].tag == "edata": -451 _check(dobs[k][0].tag == "enstag") -452 ename = dobs[k][0].text.strip() -453 e_names.append(ename) -454 _check(dobs[k][1].tag == "nr") -455 R = int(dobs[k][1].text.strip()) -456 for i in range(2, 2 + R): -457 deltas, rname, idx = _import_rdata(dobs[k][i]) -458 if separator_insertion is None or False: -459 pass -460 elif separator_insertion is True: -461 if rname.startswith(ename): -462 rname = rname[:len(ename)] + '|' + rname[len(ename):] -463 elif isinstance(separator_insertion, int): -464 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] -465 elif isinstance(separator_insertion, str): -466 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) +425 _check(dobs[3].tag == 'array') +426 +427 symbol = [] +428 if dobs[3][1].tag == 'symbol': +429 symbol = dobs[3][1].text.strip() +430 descriptiond['symbol'] = symbol +431 mean = _import_array(dobs[3])[0] +432 +433 _check(dobs[4].tag == "ne") +434 ne = int(dobs[4].text.strip()) +435 _check(dobs[5].tag == "nc") +436 nc = int(dobs[5].text.strip()) +437 +438 idld = {} +439 deltad = {} +440 covd = {} +441 gradd = {} +442 names = [] +443 e_names = [] +444 enstags = {} +445 for k in range(6, len(list(dobs))): +446 if dobs[k].tag == "edata": +447 _check(dobs[k][0].tag == "enstag") +448 ename = dobs[k][0].text.strip() +449 e_names.append(ename) +450 _check(dobs[k][1].tag == "nr") +451 R = int(dobs[k][1].text.strip()) +452 for i in range(2, 2 + R): +453 deltas, rname, idx = _import_rdata(dobs[k][i]) +454 if separator_insertion is None or False: +455 pass +456 elif separator_insertion is True: +457 if rname.startswith(ename): +458 rname = rname[:len(ename)] + '|' + rname[len(ename):] +459 elif isinstance(separator_insertion, int): +460 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] +461 elif isinstance(separator_insertion, str): +462 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) +463 else: +464 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) +465 if '|' in rname: +466 new_ename = rname[:rname.index('|')] 467 else: -468 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) -469 if '|' in rname: -470 new_ename = rname[:rname.index('|')] -471 else: -472 new_ename = ename -473 enstags[new_ename] = ename -474 idld[rname] = idx -475 deltad[rname] = deltas -476 names.append(rname) -477 elif dobs[k].tag == "cdata": -478 cname, cov, grad = _import_cdata(dobs[k]) -479 covd[cname] = cov -480 if grad.shape[1] == 1: -481 gradd[cname] = [grad for i in range(len(mean))] -482 else: -483 gradd[cname] = grad.T -484 else: -485 _check(False) -486 names = list(set(names)) +468 new_ename = ename +469 enstags[new_ename] = ename +470 idld[rname] = idx +471 deltad[rname] = deltas +472 names.append(rname) +473 elif dobs[k].tag == "cdata": +474 cname, cov, grad = _import_cdata(dobs[k]) +475 covd[cname] = cov +476 if grad.shape[1] == 1: +477 gradd[cname] = [grad for i in range(len(mean))] +478 else: +479 gradd[cname] = grad.T +480 else: +481 _check(False) +482 names = list(set(names)) +483 +484 for name in names: +485 for i in range(len(deltad[name])): +486 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 487 -488 for name in names: -489 for i in range(len(deltad[name])): -490 deltad[name][i] = np.array(deltad[name][i]) + mean[i] -491 -492 res = [] -493 for i in range(len(mean)): -494 deltas = [] -495 idl = [] -496 obs_names = [] -497 for name in names: -498 h = np.unique(deltad[name][i]) -499 if len(h) == 1 and np.all(h == mean[i]) and noempty: -500 continue -501 deltas.append(deltad[name][i]) -502 obs_names.append(name) -503 idl.append(idld[name]) -504 res.append(Obs(deltas, obs_names, idl=idl)) -505 print(mean, 'vs', res) -506 _check(len(e_names) == ne) -507 -508 cnames = list(covd.keys()) -509 for i in range(len(res)): -510 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} -511 if noempty: -512 for name in cnames: -513 if np.all(new_covobs[name].grad == 0): -514 del new_covobs[name] -515 cnames_loc = list(new_covobs.keys()) -516 else: -517 cnames_loc = cnames -518 for name in cnames_loc: -519 res[i].names.append(name) -520 res[i].shape[name] = 1 -521 res[i].idl[name] = [] -522 res[i]._covobs = new_covobs -523 -524 if symbol: -525 for i in range(len(res)): -526 res[i].tag = symbol[i] -527 if res[i].tag == 'None': -528 res[i].tag = None -529 if not noempty: -530 _check(len(res[0].covobs.keys()) == nc) -531 if full_output: -532 retd = {} -533 tool = file_origin.get('tool', None) -534 if tool: -535 program = tool['name'] + ' ' + tool['version'] -536 else: -537 program = '' -538 retd['program'] = program -539 retd['version'] = version -540 retd['who'] = file_origin['who'] -541 retd['date'] = file_origin['date'] -542 retd['host'] = file_origin['host'] -543 retd['description'] = descriptiond -544 retd['enstags'] = enstags -545 retd['obsdata'] = res -546 return retd -547 else: -548 return res +488 res = [] +489 for i in range(len(mean)): +490 deltas = [] +491 idl = [] +492 obs_names = [] +493 for name in names: +494 h = np.unique(deltad[name][i]) +495 if len(h) == 1 and np.all(h == mean[i]) and noempty: +496 continue +497 deltas.append(deltad[name][i]) +498 obs_names.append(name) +499 idl.append(idld[name]) +500 res.append(Obs(deltas, obs_names, idl=idl)) +501 res[-1]._value = mean[i] +502 _check(len(e_names) == ne) +503 +504 cnames = list(covd.keys()) +505 for i in range(len(res)): +506 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} +507 if noempty: +508 for name in cnames: +509 if np.all(new_covobs[name].grad == 0): +510 del new_covobs[name] +511 cnames_loc = list(new_covobs.keys()) +512 else: +513 cnames_loc = cnames +514 for name in cnames_loc: +515 res[i].names.append(name) +516 res[i].shape[name] = 1 +517 res[i].idl[name] = [] +518 res[i]._covobs = new_covobs +519 +520 if symbol: +521 for i in range(len(res)): +522 res[i].tag = symbol[i] +523 if res[i].tag == 'None': +524 res[i].tag = None +525 if not noempty: +526 _check(len(res[0].covobs.keys()) == nc) +527 if full_output: +528 retd = {} +529 tool = file_origin.get('tool', None) +530 if tool: +531 program = tool['name'] + ' ' + tool['version'] +532 else: +533 program = '' +534 retd['program'] = program +535 retd['version'] = version +536 retd['who'] = file_origin['who'] +537 retd['date'] = file_origin['date'] +538 retd['host'] = file_origin['host'] +539 retd['description'] = descriptiond +540 retd['enstags'] = enstags +541 retd['obsdata'] = res +542 return retd +543 else: +544 return res
551def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): -552 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. -553 -554 Tags are not written or recovered automatically. -555 -556 Parameters -557 ---------- -558 fname : str -559 Filename of the input file. -560 noemtpy : bool -561 If True, ensembles with no contribution to the Obs are not included. -562 If False, ensembles are included as written in the file. -563 full_output : bool -564 If True, a dict containing auxiliary information and the data is returned. -565 If False, only the data is returned as list. -566 gz : bool -567 If True, assumes that data is gzipped. If False, assumes XML file. -568 separatior_insertion: str, int or bool -569 str: replace all occurences of "separator_insertion" within the replica names -570 by "|%s" % (separator_insertion) when constructing the names of the replica. -571 int: Insert the separator "|" at the position given by separator_insertion. -572 True (default): separator "|" is inserted after len(ensname), assuming that the -573 ensemble name is a prefix to the replica name. -574 None or False: No separator is inserted. -575 """ -576 -577 if not fname.endswith('.xml') and not fname.endswith('.gz'): -578 fname += '.xml' -579 if gz: -580 if not fname.endswith('.gz'): -581 fname += '.gz' -582 with gzip.open(fname, 'r') as fin: -583 content = fin.read().decode('utf-8') -584 else: -585 if fname.endswith('.gz'): -586 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) -587 with open(fname, 'r', encoding='utf-8') as fin: -588 content = fin.read() -589 -590 # open and read gzipped xml file -591 infile = gzip.open(fname) -592 content = infile.read() -593 -594 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) +@@ -1604,179 +1601,188 @@ None or False: No separator is inserted.547def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): +548 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. +549 +550 Tags are not written or recovered automatically. +551 +552 Parameters +553 ---------- +554 fname : str +555 Filename of the input file. +556 noemtpy : bool +557 If True, ensembles with no contribution to the Obs are not included. +558 If False, ensembles are included as written in the file. +559 full_output : bool +560 If True, a dict containing auxiliary information and the data is returned. +561 If False, only the data is returned as list. +562 gz : bool +563 If True, assumes that data is gzipped. If False, assumes XML file. +564 separatior_insertion: str, int or bool +565 str: replace all occurences of "separator_insertion" within the replica names +566 by "|%s" % (separator_insertion) when constructing the names of the replica. +567 int: Insert the separator "|" at the position given by separator_insertion. +568 True (default): separator "|" is inserted after len(ensname), assuming that the +569 ensemble name is a prefix to the replica name. +570 None or False: No separator is inserted. +571 """ +572 +573 if not fname.endswith('.xml') and not fname.endswith('.gz'): +574 fname += '.xml' +575 if gz: +576 if not fname.endswith('.gz'): +577 fname += '.gz' +578 with gzip.open(fname, 'r') as fin: +579 content = fin.read() +580 else: +581 if fname.endswith('.gz'): +582 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) +583 with open(fname, 'r') as fin: +584 content = fin.read() +585 +586 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
656def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): -657 """Generate the string for the export of a list of Obs or structures containing Obs -658 to a .xml.gz file according to the Zeuthen dobs format. -659 -660 Tags are not written or recovered automatically. The separator |is removed from the replica names. -661 -662 Parameters -663 ---------- -664 obsl : list -665 List of Obs that will be exported. -666 The Obs inside a structure do not have to be defined on the same set of configurations, -667 but the storage requirement is increased, if this is not the case. -668 name : str -669 The name of the observable. -670 spec : str -671 Optional string that describes the contents of the file. -672 origin : str -673 Specify where the data has its origin. -674 symbol : list -675 A list of symbols that describe the observables to be written. May be empty. -676 who : str -677 Provide the name of the person that exports the data. -678 enstags : dict -679 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} -680 Otherwise, the ensemble name is used. -681 """ -682 if enstags is None: -683 enstags = {} -684 od = {} -685 r_names = [] -686 for o in obsl: -687 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] -688 r_names = sorted(set(r_names)) -689 mc_names = sorted(set([n.split('|')[0] for n in r_names])) -690 for tmpname in mc_names: -691 if tmpname not in enstags: -692 enstags[tmpname] = tmpname -693 ne = len(set(mc_names)) -694 cov_names = [] -695 for o in obsl: -696 cov_names += list(o.cov_names) -697 cov_names = sorted(set(cov_names)) -698 nc = len(set(cov_names)) -699 od['OBSERVABLES'] = {} -700 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} -701 if who is None: -702 who = getpass.getuser() -703 od['OBSERVABLES']['origin'] = { -704 'who': who, -705 'date': str(datetime.datetime.now())[:-7], -706 'host': socket.gethostname(), -707 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} -708 od['OBSERVABLES']['dobs'] = {} -709 pd = od['OBSERVABLES']['dobs'] -710 pd['spec'] = spec -711 pd['origin'] = origin -712 pd['name'] = name -713 pd['array'] = {} -714 pd['array']['id'] = 'val' -715 pd['array']['layout'] = '1 f%d' % (len(obsl)) -716 osymbol = '' -717 if symbol: -718 if not isinstance(symbol, list): -719 raise Exception('Symbol has to be a list!') -720 if not (len(symbol) == 0 or len(symbol) == len(obsl)): -721 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) -722 osymbol = symbol[0] -723 for s in symbol[1:]: -724 osymbol += ' %s' % s -725 pd['array']['symbol'] = osymbol -726 -727 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] -728 pd['ne'] = '%d' % (ne) -729 pd['nc'] = '%d' % (nc) -730 pd['edata'] = [] -731 for name in mc_names: -732 ed = {} -733 ed['enstag'] = enstags[name] -734 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) -735 nr = len(onames) -736 ed['nr'] = nr -737 ed[''] = [] -738 -739 for r in range(nr): -740 ad = {} -741 repname = onames[r] -742 ad['id'] = repname.replace('|', '') -743 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) -744 Nconf = len(idx) -745 layout = '%d i f%d' % (Nconf, len(obsl)) -746 ad['layout'] = layout -747 data = '' -748 counters = [0 for o in obsl] -749 for ci in idx: -750 data += '%d ' % ci -751 for oi in range(len(obsl)): -752 o = obsl[oi] -753 if repname in o.idl: -754 if counters[oi] < 0: -755 data += '0 ' -756 continue -757 if o.idl[repname][counters[oi]] == ci: -758 num = o.deltas[repname][counters[oi]] -759 if num == 0: -760 data += '0 ' -761 else: -762 data += '%1.16e ' % (num) -763 counters[oi] += 1 -764 if counters[oi] >= len(o.idl[repname]): -765 counters[oi] = -1 -766 else: -767 data += '0 ' -768 else: -769 data += '0 ' -770 data += '\n' -771 ad['#data'] = data -772 ed[''].append(ad) -773 pd['edata'].append(ed) -774 -775 allcov = {} -776 for o in obsl: -777 for name in o.cov_names: -778 if name in allcov: -779 if not np.array_equal(allcov[name], o.covobs[name].cov): -780 raise Exception('Inconsistent covariance matrices for %s!' % (name)) -781 else: -782 allcov[name] = o.covobs[name].cov -783 pd['cdata'] = [] -784 for name in cov_names: -785 cd = {} -786 cd['id'] = name -787 -788 covd = {'id': 'cov'} -789 if allcov[name].shape == (): -790 ncov = 1 -791 covd['layout'] = '1 1 f' -792 covd['#data'] = '%1.14e' % (allcov[name]) -793 else: -794 shape = allcov[name].shape -795 assert (shape[0] == shape[1]) -796 ncov = shape[0] -797 covd['layout'] = '%d %d f' % (ncov, ncov) -798 ds = '' -799 for i in range(ncov): -800 for j in range(ncov): -801 val = allcov[name][i][j] -802 if val == 0: -803 ds += '0 ' -804 else: -805 ds += '%1.14e ' % (val) -806 ds += '\n' -807 covd['#data'] = ds -808 -809 gradd = {'id': 'grad'} -810 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) -811 ds = '' -812 for i in range(ncov): -813 for o in obsl: -814 if name in o.covobs: -815 val = o.covobs[name].grad[i] -816 if val != 0: -817 ds += '%1.14e ' % (val) -818 else: -819 ds += '0 ' -820 else: -821 ds += '0 ' -822 gradd['#data'] = ds -823 cd['array'] = [covd, gradd] -824 pd['cdata'].append(cd) -825 -826 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) -827 -828 return rs +@@ -1821,54 +1827,54 @@ Otherwise, the ensemble name is used.648def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): +649 """Generate the string for the export of a list of Obs or structures containing Obs +650 to a .xml.gz file according to the Zeuthen dobs format. +651 +652 Tags are not written or recovered automatically. The separator |is removed from the replica names. +653 +654 Parameters +655 ---------- +656 obsl : list +657 List of Obs that will be exported. +658 The Obs inside a structure do not have to be defined on the same set of configurations, +659 but the storage requirement is increased, if this is not the case. +660 name : str +661 The name of the observable. +662 spec : str +663 Optional string that describes the contents of the file. +664 origin : str +665 Specify where the data has its origin. +666 symbol : list +667 A list of symbols that describe the observables to be written. May be empty. +668 who : str +669 Provide the name of the person that exports the data. +670 enstags : dict +671 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} +672 Otherwise, the ensemble name is used. +673 """ +674 if enstags is None: +675 enstags = {} +676 od = {} +677 r_names = [] +678 for o in obsl: +679 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] +680 r_names = sorted(set(r_names)) +681 mc_names = sorted(set([n.split('|')[0] for n in r_names])) +682 for tmpname in mc_names: +683 if tmpname not in enstags: +684 enstags[tmpname] = tmpname +685 ne = len(set(mc_names)) +686 cov_names = [] +687 for o in obsl: +688 cov_names += list(o.cov_names) +689 cov_names = sorted(set(cov_names)) +690 nc = len(set(cov_names)) +691 od['OBSERVABLES'] = {} +692 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} +693 if who is None: +694 who = getpass.getuser() +695 od['OBSERVABLES']['origin'] = { +696 'who': who, +697 'date': str(datetime.datetime.now())[:-7], +698 'host': socket.gethostname(), +699 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} +700 od['OBSERVABLES']['dobs'] = {} +701 pd = od['OBSERVABLES']['dobs'] +702 pd['spec'] = spec +703 pd['origin'] = origin +704 pd['name'] = name +705 pd['array'] = {} +706 pd['array']['id'] = 'val' +707 pd['array']['layout'] = '1 f%d' % (len(obsl)) +708 osymbol = '' +709 if symbol: +710 if not isinstance(symbol, list): +711 raise Exception('Symbol has to be a list!') +712 if not (len(symbol) == 0 or len(symbol) == len(obsl)): +713 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) +714 osymbol = symbol[0] +715 for s in symbol[1:]: +716 osymbol += ' %s' % s +717 pd['array']['symbol'] = osymbol +718 +719 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] +720 pd['ne'] = '%d' % (ne) +721 pd['nc'] = '%d' % (nc) +722 pd['edata'] = [] +723 for name in mc_names: +724 ed = {} +725 ed['enstag'] = enstags[name] +726 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) +727 nr = len(onames) +728 ed['nr'] = nr +729 ed[''] = [] +730 +731 for r in range(nr): +732 ad = {} +733 repname = onames[r] +734 ad['id'] = repname.replace('|', '') +735 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) +736 Nconf = len(idx) +737 layout = '%d i f%d' % (Nconf, len(obsl)) +738 ad['layout'] = layout +739 data = '' +740 counters = [0 for o in obsl] +741 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] +742 for ci in idx: +743 data += '%d ' % ci +744 for oi in range(len(obsl)): +745 o = obsl[oi] +746 if repname in o.idl: +747 if counters[oi] < 0: +748 num = offsets[oi] +749 if num == 0: +750 data += '0 ' +751 else: +752 data += '%1.16e ' % (num) +753 continue +754 if o.idl[repname][counters[oi]] == ci: +755 num = o.deltas[repname][counters[oi]] + offsets[oi] +756 if num == 0: +757 data += '0 ' +758 else: +759 data += '%1.16e ' % (num) +760 counters[oi] += 1 +761 if counters[oi] >= len(o.idl[repname]): +762 counters[oi] = -1 +763 else: +764 num = offsets[oi] +765 if num == 0: +766 data += '0 ' +767 else: +768 data += '%1.16e ' % (num) +769 else: +770 data += '0 ' +771 data += '\n' +772 ad['#data'] = data +773 ed[''].append(ad) +774 pd['edata'].append(ed) +775 +776 allcov = {} +777 for o in obsl: +778 for cname in o.cov_names: +779 if cname in allcov: +780 if not np.array_equal(allcov[cname], o.covobs[cname].cov): +781 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) +782 else: +783 allcov[cname] = o.covobs[cname].cov +784 pd['cdata'] = [] +785 for cname in cov_names: +786 cd = {} +787 cd['id'] = cname +788 +789 covd = {'id': 'cov'} +790 if allcov[cname].shape == (): +791 ncov = 1 +792 covd['layout'] = '1 1 f' +793 covd['#data'] = '%1.14e' % (allcov[cname]) +794 else: +795 shape = allcov[cname].shape +796 assert (shape[0] == shape[1]) +797 ncov = shape[0] +798 covd['layout'] = '%d %d f' % (ncov, ncov) +799 ds = '' +800 for i in range(ncov): +801 for j in range(ncov): +802 val = allcov[cname][i][j] +803 if val == 0: +804 ds += '0 ' +805 else: +806 ds += '%1.14e ' % (val) +807 ds += '\n' +808 covd['#data'] = ds +809 +810 gradd = {'id': 'grad'} +811 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) +812 ds = '' +813 for i in range(ncov): +814 for o in obsl: +815 if cname in o.covobs: +816 val = o.covobs[cname].grad[i] +817 if val != 0: +818 ds += '%1.14e ' % (val) +819 else: +820 ds += '0 ' +821 else: +822 ds += '0 ' +823 gradd['#data'] = ds +824 cd['array'] = [covd, gradd] +825 pd['cdata'].append(cd) +826 +827 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) +828 +829 return rs
831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): -832 """Export a list of Obs or structures containing Obs to a .xml.gz file -833 according to the Zeuthen dobs format. -834 -835 Tags are not written or recovered automatically. The separator | is removed from the replica names. -836 -837 Parameters -838 ---------- -839 obsl : list -840 List of Obs that will be exported. -841 The Obs inside a structure do not have to be defined on the same set of configurations, -842 but the storage requirement is increased, if this is not the case. -843 fname : str -844 Filename of the output file. -845 name : str -846 The name of the observable. -847 spec : str -848 Optional string that describes the contents of the file. -849 origin : str -850 Specify where the data has its origin. -851 symbol : list -852 A list of symbols that describe the observables to be written. May be empty. -853 who : str -854 Provide the name of the person that exports the data. -855 enstags : dict -856 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} -857 Otherwise, the ensemble name is used. -858 gz : bool -859 If True, the output is a gzipped XML. If False, the output is a XML file. -860 """ -861 if enstags is None: -862 enstags = {} -863 -864 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) -865 -866 if not fname.endswith('.xml') and not fname.endswith('.gz'): -867 fname += '.xml' -868 -869 if gz: -870 if not fname.endswith('.gz'): -871 fname += '.gz' -872 -873 fp = gzip.open(fname, 'wb') -874 fp.write(dobsstring.encode('utf-8')) -875 else: -876 fp = open(fname, 'w', encoding='utf-8') -877 fp.write(dobsstring) -878 fp.close() +832def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): +833 """Export a list of Obs or structures containing Obs to a .xml.gz file +834 according to the Zeuthen dobs format. +835 +836 Tags are not written or recovered automatically. The separator | is removed from the replica names. +837 +838 Parameters +839 ---------- +840 obsl : list +841 List of Obs that will be exported. +842 The Obs inside a structure do not have to be defined on the same set of configurations, +843 but the storage requirement is increased, if this is not the case. +844 fname : str +845 Filename of the output file. +846 name : str +847 The name of the observable. +848 spec : str +849 Optional string that describes the contents of the file. +850 origin : str +851 Specify where the data has its origin. +852 symbol : list +853 A list of symbols that describe the observables to be written. May be empty. +854 who : str +855 Provide the name of the person that exports the data. +856 enstags : dict +857 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} +858 Otherwise, the ensemble name is used. +859 gz : bool +860 If True, the output is a gzipped XML. If False, the output is a XML file. +861 """ +862 if enstags is None: +863 enstags = {} +864 +865 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) +866 +867 if not fname.endswith('.xml') and not fname.endswith('.gz'): +868 fname += '.xml' +869 +870 if gz: +871 if not fname.endswith('.gz'): +872 fname += '.gz' +873 +874 fp = gzip.open(fname, 'wb') +875 fp.write(dobsstring.encode('utf-8')) +876 else: +877 fp = open(fname, 'w', encoding='utf-8') +878 fp.write(dobsstring) +879 fp.close()