pyerrors.input.dobs
1from collections import defaultdict 2import gzip 3import lxml.etree as et 4import getpass 5import socket 6import datetime 7import json 8import warnings 9import numpy as np 10from ..obs import Obs 11from ..obs import _merge_idx 12from ..covobs import Covobs 13from .. import version as pyerrorsversion 14 15 16# Based on https://stackoverflow.com/a/10076823 17def _etree_to_dict(t): 18 """ Convert the content of an XML file to a python dict""" 19 d = {t.tag: {} if t.attrib else None} 20 children = list(t) 21 if children: 22 dd = defaultdict(list) 23 for dc in map(_etree_to_dict, children): 24 for k, v in dc.items(): 25 dd[k].append(v) 26 d = {t.tag: {k: v[0] if len(v) == 1 else v 27 for k, v in dd.items()}} 28 if t.attrib: 29 d[t.tag].update(('@' + k, v) 30 for k, v in t.attrib.items()) 31 if t.text: 32 text = t.text.strip() 33 if children or t.attrib: 34 if text: 35 d[t.tag]['#data'] = [text] 36 else: 37 d[t.tag] = text 38 return d 39 40 41def _dict_to_xmlstring(d): 42 if isinstance(d, dict): 43 iters = '' 44 for k in d: 45 if k.startswith('#'): 46 for la in d[k]: 47 iters += la 48 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 49 return iters 50 if isinstance(d[k], dict): 51 iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 52 elif isinstance(d[k], str): 53 if len(d[k]) > 100: 54 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 55 else: 56 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 57 elif isinstance(d[k], list): 58 for i in range(len(d[k])): 59 iters += _dict_to_xmlstring(d[k][i]) 60 elif not d[k]: 61 return '\n' 62 else: 63 raise Exception('Type', type(d[k]), 'not supported in export!') 64 else: 65 raise Exception('Type', type(d), 'not supported in export!') 66 return iters 67 68 69def _dict_to_xmlstring_spaces(d, space=' '): 70 s = _dict_to_xmlstring(d) 71 o = '' 72 c = 0 73 cm = False 74 for li in s.split('\n'): 75 if li.startswith('<%s' % ('/')): 76 c -= 1 77 cm = True 78 for i in range(c): 79 o += space 80 o += li + '\n' 81 if li.startswith('<') and not cm: 82 if not '<%s' % ('/') in li: 83 c += 1 84 cm = False 85 return o 86 87 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 89 """Export a list of Obs or structures containing Obs to an xml string 90 according to the Zeuthen pobs format. 91 92 Tags are not written or recovered automatically. The separator | is removed from the replica names. 93 94 Parameters 95 ---------- 96 obsl : list 97 List of Obs that will be exported. 98 The Obs inside a structure have to be defined on the same ensemble. 99 name : str 100 The name of the observable. 101 spec : str 102 Optional string that describes the contents of the file. 103 origin : str 104 Specify where the data has its origin. 105 symbol : list 106 A list of symbols that describe the observables to be written. May be empty. 107 enstag : str 108 Enstag that is written to pobs. If None, the ensemble name is used. 109 """ 110 111 od = {} 112 ename = obsl[0].e_names[0] 113 names = list(obsl[0].deltas.keys()) 114 nr = len(names) 115 onames = [name.replace('|', '') for name in names] 116 for o in obsl: 117 if len(o.e_names) != 1: 118 raise Exception('You try to export dobs to obs!') 119 if o.e_names[0] != ename: 120 raise Exception('You try to export dobs to obs!') 121 if len(o.deltas.keys()) != nr: 122 raise Exception('Incompatible obses in list') 123 od['observables'] = {} 124 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 125 od['observables']['origin'] = { 126 'who': getpass.getuser(), 127 'date': str(datetime.datetime.now())[:-7], 128 'host': socket.gethostname(), 129 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 130 od['observables']['pobs'] = {} 131 pd = od['observables']['pobs'] 132 pd['spec'] = spec 133 pd['origin'] = origin 134 pd['name'] = name 135 if enstag: 136 if not isinstance(enstag, str): 137 raise Exception('enstag has to be a string!') 138 pd['enstag'] = enstag 139 else: 140 pd['enstag'] = ename 141 pd['nr'] = '%d' % (nr) 142 pd['array'] = [] 143 osymbol = 'cfg' 144 if not isinstance(symbol, list): 145 raise Exception('Symbol has to be a list!') 146 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 147 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 148 for s in symbol: 149 osymbol += ' %s' % s 150 for r in range(nr): 151 ad = {} 152 ad['id'] = onames[r] 153 Nconf = len(obsl[0].deltas[names[r]]) 154 layout = '%d i f%d' % (Nconf, len(obsl)) 155 ad['layout'] = layout 156 ad['symbol'] = osymbol 157 data = '' 158 for c in range(Nconf): 159 data += '%d ' % obsl[0].idl[names[r]][c] 160 for o in obsl: 161 num = o.deltas[names[r]][c] + o.r_values[names[r]] 162 if num == 0: 163 data += '0 ' 164 else: 165 data += '%1.16e ' % (num) 166 data += '\n' 167 ad['#data'] = data 168 pd['array'].append(ad) 169 170 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 171 return rs 172 173 174def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 175 """Export a list of Obs or structures containing Obs to a .xml.gz file 176 according to the Zeuthen pobs format. 177 178 Tags are not written or recovered automatically. The separator | is removed from the replica names. 179 180 Parameters 181 ---------- 182 obsl : list 183 List of Obs that will be exported. 184 The Obs inside a structure have to be defined on the same ensemble. 185 fname : str 186 Filename of the output file. 187 name : str 188 The name of the observable. 189 spec : str 190 Optional string that describes the contents of the file. 191 origin : str 192 Specify where the data has its origin. 193 symbol : list 194 A list of symbols that describe the observables to be written. May be empty. 195 enstag : str 196 Enstag that is written to pobs. If None, the ensemble name is used. 197 gz : bool 198 If True, the output is a gzipped xml. If False, the output is an xml file. 199 """ 200 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 201 202 if not fname.endswith('.xml') and not fname.endswith('.gz'): 203 fname += '.xml' 204 205 if gz: 206 if not fname.endswith('.gz'): 207 fname += '.gz' 208 209 fp = gzip.open(fname, 'wb') 210 fp.write(pobsstring.encode('utf-8')) 211 else: 212 fp = open(fname, 'w', encoding='utf-8') 213 fp.write(pobsstring) 214 fp.close() 215 216 217def _import_data(string): 218 return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]") 219 220 221def _check(condition): 222 if not condition: 223 raise Exception("XML file format not supported") 224 225 226class _NoTagInDataError(Exception): 227 """Raised when tag is not in data""" 228 def __init__(self, tag): 229 self.tag = tag 230 super().__init__('Tag %s not in data!' % (self.tag)) 231 232 233def _find_tag(dat, tag): 234 for i in range(len(dat)): 235 if dat[i].tag == tag: 236 return i 237 raise _NoTagInDataError(tag) 238 239 240def _import_array(arr): 241 name = arr[_find_tag(arr, 'id')].text.strip() 242 index = _find_tag(arr, 'layout') 243 try: 244 sindex = _find_tag(arr, 'symbol') 245 except _NoTagInDataError: 246 sindex = 0 247 if sindex > index: 248 tmp = _import_data(arr[sindex].tail) 249 else: 250 tmp = _import_data(arr[index].tail) 251 252 li = arr[index].text.strip() 253 m = li.split() 254 if m[1] == "i" and m[2][0] == "f": 255 nc = int(m[0]) 256 na = int(m[2].lstrip('f')) 257 _dat = [] 258 mask = [] 259 for a in range(na): 260 mask += [a] 261 _dat += [np.array(tmp[1 + a:: na + 1])] 262 _check(len(tmp[0:: na + 1]) == nc) 263 return [name, tmp[0:: na + 1], mask, _dat] 264 elif m[1][0] == 'f' and len(m) < 3: 265 sh = (int(m[0]), int(m[1].lstrip('f'))) 266 return np.reshape(tmp, sh) 267 elif any(['f' in s for s in m]): 268 for si in range(len(m)): 269 if m[si] == 'f': 270 break 271 sh = [int(m[i]) for i in range(si)] 272 return np.reshape(tmp, sh) 273 else: 274 print(name, m) 275 _check(False) 276 277 278def _import_rdata(rd): 279 name, idx, mask, deltas = _import_array(rd) 280 return deltas, name, idx 281 282 283def _import_cdata(cd): 284 _check(cd[0].tag == "id") 285 _check(cd[1][0].text.strip() == "cov") 286 cov = _import_array(cd[1]) 287 grad = _import_array(cd[2]) 288 return cd[0].text.strip(), cov, grad 289 290 291def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 292 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 293 294 Tags are not written or recovered automatically. 295 296 Parameters 297 ---------- 298 fname : str 299 Filename of the input file. 300 full_output : bool 301 If True, a dict containing auxiliary information and the data is returned. 302 If False, only the data is returned as list. 303 separatior_insertion: str or int 304 str: replace all occurences of "separator_insertion" within the replica names 305 by "|%s" % (separator_insertion) when constructing the names of the replica. 306 int: Insert the separator "|" at the position given by separator_insertion. 307 None (default): Replica names remain unchanged. 308 """ 309 310 if not fname.endswith('.xml') and not fname.endswith('.gz'): 311 fname += '.xml' 312 if gz: 313 if not fname.endswith('.gz'): 314 fname += '.gz' 315 with gzip.open(fname, 'r') as fin: 316 content = fin.read() 317 else: 318 if fname.endswith('.gz'): 319 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 320 with open(fname, 'r') as fin: 321 content = fin.read() 322 323 # parse xml file content 324 root = et.fromstring(content) 325 326 _check(root[2].tag == 'pobs') 327 pobs = root[2] 328 329 version = root[0][1].text.strip() 330 331 _check(root[1].tag == 'origin') 332 file_origin = _etree_to_dict(root[1])['origin'] 333 334 deltas = [] 335 names = [] 336 idl = [] 337 for i in range(5, len(pobs)): 338 delta, name, idx = _import_rdata(pobs[i]) 339 deltas.append(delta) 340 if separator_insertion is None: 341 pass 342 elif isinstance(separator_insertion, int): 343 name = name[:separator_insertion] + '|' + name[separator_insertion:] 344 elif isinstance(separator_insertion, str): 345 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 346 else: 347 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 348 names.append(name) 349 idl.append(idx) 350 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 351 352 descriptiond = {} 353 for i in range(4): 354 descriptiond[pobs[i].tag] = pobs[i].text.strip() 355 356 _check(pobs[4].tag == "nr") 357 358 _check(pobs[5].tag == 'array') 359 if pobs[5][1].tag == 'symbol': 360 symbol = pobs[5][1].text.strip() 361 descriptiond['symbol'] = symbol 362 363 if full_output: 364 retd = {} 365 tool = file_origin.get('tool', None) 366 if tool: 367 program = tool['name'] + ' ' + tool['version'] 368 else: 369 program = '' 370 retd['program'] = program 371 retd['version'] = version 372 retd['who'] = file_origin['who'] 373 retd['date'] = file_origin['date'] 374 retd['host'] = file_origin['host'] 375 retd['description'] = descriptiond 376 retd['obsdata'] = res 377 return retd 378 else: 379 return res 380 381 382# Reading (and writing) dobs is not yet working properly: 383# we have to loop over root[2:] because each entry is a dobs 384# But maybe this is just a problem with Ben's implementation 385 386# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py 387def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): 388 """Import a list of Obs from a string in the Zeuthen dobs format. 389 390 Tags are not written or recovered automatically. 391 392 Parameters 393 ---------- 394 content : str 395 XML string containing the data 396 noemtpy : bool 397 If True, ensembles with no contribution to the Obs are not included. 398 If False, ensembles are included as written in the file, possibly with vanishing entries. 399 full_output : bool 400 If True, a dict containing auxiliary information and the data is returned. 401 If False, only the data is returned as list. 402 separatior_insertion: str, int or bool 403 str: replace all occurences of "separator_insertion" within the replica names 404 by "|%s" % (separator_insertion) when constructing the names of the replica. 405 int: Insert the separator "|" at the position given by separator_insertion. 406 True (default): separator "|" is inserted after len(ensname), assuming that the 407 ensemble name is a prefix to the replica name. 408 None or False: No separator is inserted. 409 """ 410 411 root = et.fromstring(content) 412 413 _check(root.tag == 'OBSERVABLES') 414 _check(root[0].tag == 'SCHEMA') 415 version = root[0][1].text.strip() 416 417 _check(root[1].tag == 'origin') 418 file_origin = _etree_to_dict(root[1])['origin'] 419 420 _check(root[2].tag == 'dobs') 421 422 dobs = root[2] 423 424 descriptiond = {} 425 for i in range(3): 426 descriptiond[dobs[i].tag] = dobs[i].text.strip() 427 428 _check(dobs[3].tag == 'array') 429 430 symbol = [] 431 if dobs[3][1].tag == 'symbol': 432 symbol = dobs[3][1].text.strip() 433 descriptiond['symbol'] = symbol 434 mean = _import_array(dobs[3])[0] 435 436 _check(dobs[4].tag == "ne") 437 ne = int(dobs[4].text.strip()) 438 _check(dobs[5].tag == "nc") 439 nc = int(dobs[5].text.strip()) 440 441 idld = {} 442 deltad = {} 443 covd = {} 444 gradd = {} 445 names = [] 446 e_names = [] 447 enstags = {} 448 for k in range(6, len(list(dobs))): 449 if dobs[k].tag == "edata": 450 _check(dobs[k][0].tag == "enstag") 451 ename = dobs[k][0].text.strip() 452 e_names.append(ename) 453 _check(dobs[k][1].tag == "nr") 454 R = int(dobs[k][1].text.strip()) 455 for i in range(2, 2 + R): 456 deltas, rname, idx = _import_rdata(dobs[k][i]) 457 if separator_insertion is None or False: 458 pass 459 elif separator_insertion is True: 460 if rname.startswith(ename): 461 rname = rname[:len(ename)] + '|' + rname[len(ename):] 462 elif isinstance(separator_insertion, int): 463 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 464 elif isinstance(separator_insertion, str): 465 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 466 else: 467 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 468 if '|' in rname: 469 new_ename = rname[:rname.index('|')] 470 else: 471 new_ename = ename 472 enstags[new_ename] = ename 473 idld[rname] = idx 474 deltad[rname] = deltas 475 names.append(rname) 476 elif dobs[k].tag == "cdata": 477 cname, cov, grad = _import_cdata(dobs[k]) 478 covd[cname] = cov 479 if grad.shape[1] == 1: 480 gradd[cname] = [grad for i in range(len(mean))] 481 else: 482 gradd[cname] = grad.T 483 else: 484 _check(False) 485 names = list(set(names)) 486 487 for name in names: 488 for i in range(len(deltad[name])): 489 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 490 491 res = [] 492 for i in range(len(mean)): 493 deltas = [] 494 idl = [] 495 obs_names = [] 496 for name in names: 497 h = np.unique(deltad[name][i]) 498 if len(h) == 1 and np.all(h == mean[i]) and noempty: 499 continue 500 deltas.append(deltad[name][i]) 501 obs_names.append(name) 502 idl.append(idld[name]) 503 res.append(Obs(deltas, obs_names, idl=idl)) 504 print(mean, 'vs', res) 505 _check(len(e_names) == ne) 506 507 cnames = list(covd.keys()) 508 for i in range(len(res)): 509 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 510 if noempty: 511 for name in cnames: 512 if np.all(new_covobs[name].grad == 0): 513 del new_covobs[name] 514 cnames_loc = list(new_covobs.keys()) 515 else: 516 cnames_loc = cnames 517 for name in cnames_loc: 518 res[i].names.append(name) 519 res[i].shape[name] = 1 520 res[i].idl[name] = [] 521 res[i]._covobs = new_covobs 522 523 if symbol: 524 for i in range(len(res)): 525 res[i].tag = symbol[i] 526 if res[i].tag == 'None': 527 res[i].tag = None 528 if not noempty: 529 _check(len(res[0].covobs.keys()) == nc) 530 if full_output: 531 retd = {} 532 tool = file_origin.get('tool', None) 533 if tool: 534 program = tool['name'] + ' ' + tool['version'] 535 else: 536 program = '' 537 retd['program'] = program 538 retd['version'] = version 539 retd['who'] = file_origin['who'] 540 retd['date'] = file_origin['date'] 541 retd['host'] = file_origin['host'] 542 retd['description'] = descriptiond 543 retd['enstags'] = enstags 544 retd['obsdata'] = res 545 return retd 546 else: 547 return res 548 549 550def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): 551 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 552 553 Tags are not written or recovered automatically. 554 555 Parameters 556 ---------- 557 fname : str 558 Filename of the input file. 559 noemtpy : bool 560 If True, ensembles with no contribution to the Obs are not included. 561 If False, ensembles are included as written in the file. 562 full_output : bool 563 If True, a dict containing auxiliary information and the data is returned. 564 If False, only the data is returned as list. 565 gz : bool 566 If True, assumes that data is gzipped. If False, assumes XML file. 567 separatior_insertion: str, int or bool 568 str: replace all occurences of "separator_insertion" within the replica names 569 by "|%s" % (separator_insertion) when constructing the names of the replica. 570 int: Insert the separator "|" at the position given by separator_insertion. 571 True (default): separator "|" is inserted after len(ensname), assuming that the 572 ensemble name is a prefix to the replica name. 573 None or False: No separator is inserted. 574 """ 575 576 if not fname.endswith('.xml') and not fname.endswith('.gz'): 577 fname += '.xml' 578 if gz: 579 if not fname.endswith('.gz'): 580 fname += '.gz' 581 with gzip.open(fname, 'r') as fin: 582 content = fin.read().decode('utf-8') 583 else: 584 if fname.endswith('.gz'): 585 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 586 with open(fname, 'r', encoding='utf-8') as fin: 587 content = fin.read() 588 589 # open and read gzipped xml file 590 infile = gzip.open(fname) 591 content = infile.read() 592 593 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion) 594 595 596def _dobsdict_to_xmlstring(d): 597 if isinstance(d, dict): 598 iters = '' 599 for k in d: 600 if k.startswith('#value'): 601 for li in d[k]: 602 iters += li 603 return iters + '\n' 604 elif k.startswith('#'): 605 for li in d[k]: 606 iters += li 607 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 608 return iters 609 if isinstance(d[k], dict): 610 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 611 elif isinstance(d[k], str): 612 if len(d[k]) > 100: 613 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 614 else: 615 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 616 elif isinstance(d[k], list): 617 tmps = '' 618 if k in ['edata', 'cdata']: 619 for i in range(len(d[k])): 620 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) 621 else: 622 for i in range(len(d[k])): 623 tmps += _dobsdict_to_xmlstring(d[k][i]) 624 iters += tmps 625 elif isinstance(d[k], (int, float)): 626 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) 627 elif not d[k]: 628 return '\n' 629 else: 630 raise Exception('Type', type(d[k]), 'not supported in export!') 631 else: 632 raise Exception('Type', type(d), 'not supported in export!') 633 return iters 634 635 636def _dobsdict_to_xmlstring_spaces(d, space=' '): 637 s = _dobsdict_to_xmlstring(d) 638 o = '' 639 c = 0 640 cm = False 641 for li in s.split('\n'): 642 if li.startswith('<%s' % ('/')): 643 c -= 1 644 cm = True 645 for i in range(c): 646 o += space 647 o += li + '\n' 648 if li.startswith('<') and not cm: 649 if not '<%s' % ('/') in li: 650 c += 1 651 cm = False 652 return o 653 654 655def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): 656 """Generate the string for the export of a list of Obs or structures containing Obs 657 to a .xml.gz file according to the Zeuthen dobs format. 658 659 Tags are not written or recovered automatically. The separator |is removed from the replica names. 660 661 Parameters 662 ---------- 663 obsl : list 664 List of Obs that will be exported. 665 The Obs inside a structure do not have to be defined on the same set of configurations, 666 but the storage requirement is increased, if this is not the case. 667 name : str 668 The name of the observable. 669 spec : str 670 Optional string that describes the contents of the file. 671 origin : str 672 Specify where the data has its origin. 673 symbol : list 674 A list of symbols that describe the observables to be written. May be empty. 675 who : str 676 Provide the name of the person that exports the data. 677 enstags : dict 678 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 679 Otherwise, the ensemble name is used. 680 """ 681 if enstags is None: 682 enstags = {} 683 od = {} 684 r_names = [] 685 for o in obsl: 686 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 687 r_names = sorted(set(r_names)) 688 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 689 for tmpname in mc_names: 690 if tmpname not in enstags: 691 enstags[tmpname] = tmpname 692 ne = len(set(mc_names)) 693 cov_names = [] 694 for o in obsl: 695 cov_names += list(o.cov_names) 696 cov_names = sorted(set(cov_names)) 697 nc = len(set(cov_names)) 698 od['OBSERVABLES'] = {} 699 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 700 if who is None: 701 who = getpass.getuser() 702 od['OBSERVABLES']['origin'] = { 703 'who': who, 704 'date': str(datetime.datetime.now())[:-7], 705 'host': socket.gethostname(), 706 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 707 od['OBSERVABLES']['dobs'] = {} 708 pd = od['OBSERVABLES']['dobs'] 709 pd['spec'] = spec 710 pd['origin'] = origin 711 pd['name'] = name 712 pd['array'] = {} 713 pd['array']['id'] = 'val' 714 pd['array']['layout'] = '1 f%d' % (len(obsl)) 715 osymbol = '' 716 if symbol: 717 if not isinstance(symbol, list): 718 raise Exception('Symbol has to be a list!') 719 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 720 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 721 osymbol = symbol[0] 722 for s in symbol[1:]: 723 osymbol += ' %s' % s 724 pd['array']['symbol'] = osymbol 725 726 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 727 pd['ne'] = '%d' % (ne) 728 pd['nc'] = '%d' % (nc) 729 pd['edata'] = [] 730 for name in mc_names: 731 ed = {} 732 ed['enstag'] = enstags[name] 733 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 734 nr = len(onames) 735 ed['nr'] = nr 736 ed[''] = [] 737 738 for r in range(nr): 739 ad = {} 740 repname = onames[r] 741 ad['id'] = repname.replace('|', '') 742 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 743 Nconf = len(idx) 744 layout = '%d i f%d' % (Nconf, len(obsl)) 745 ad['layout'] = layout 746 data = '' 747 counters = [0 for o in obsl] 748 for ci in idx: 749 data += '%d ' % ci 750 for oi in range(len(obsl)): 751 o = obsl[oi] 752 if repname in o.idl: 753 if counters[oi] < 0: 754 data += '0 ' 755 continue 756 if o.idl[repname][counters[oi]] == ci: 757 num = o.deltas[repname][counters[oi]] 758 if num == 0: 759 data += '0 ' 760 else: 761 data += '%1.16e ' % (num) 762 counters[oi] += 1 763 if counters[oi] >= len(o.idl[repname]): 764 counters[oi] = -1 765 else: 766 data += '0 ' 767 else: 768 data += '0 ' 769 data += '\n' 770 ad['#data'] = data 771 ed[''].append(ad) 772 pd['edata'].append(ed) 773 774 allcov = {} 775 for o in obsl: 776 for name in o.cov_names: 777 if name in allcov: 778 if not np.array_equal(allcov[name], o.covobs[name].cov): 779 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 780 else: 781 allcov[name] = o.covobs[name].cov 782 pd['cdata'] = [] 783 for name in cov_names: 784 cd = {} 785 cd['id'] = name 786 787 covd = {'id': 'cov'} 788 if allcov[name].shape == (): 789 ncov = 1 790 covd['layout'] = '1 1 f' 791 covd['#data'] = '%1.14e' % (allcov[name]) 792 else: 793 shape = allcov[name].shape 794 assert (shape[0] == shape[1]) 795 ncov = shape[0] 796 covd['layout'] = '%d %d f' % (ncov, ncov) 797 ds = '' 798 for i in range(ncov): 799 for j in range(ncov): 800 val = allcov[name][i][j] 801 if val == 0: 802 ds += '0 ' 803 else: 804 ds += '%1.14e ' % (val) 805 ds += '\n' 806 covd['#data'] = ds 807 808 gradd = {'id': 'grad'} 809 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 810 ds = '' 811 for i in range(ncov): 812 for o in obsl: 813 if name in o.covobs: 814 val = o.covobs[name].grad[i] 815 if val != 0: 816 ds += '%1.14e ' % (val) 817 else: 818 ds += '0 ' 819 else: 820 ds += '0 ' 821 gradd['#data'] = ds 822 cd['array'] = [covd, gradd] 823 pd['cdata'].append(cd) 824 825 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 826 827 return rs 828 829 830def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True): 831 """Export a list of Obs or structures containing Obs to a .xml.gz file 832 according to the Zeuthen dobs format. 833 834 Tags are not written or recovered automatically. The separator | is removed from the replica names. 835 836 Parameters 837 ---------- 838 obsl : list 839 List of Obs that will be exported. 840 The Obs inside a structure do not have to be defined on the same set of configurations, 841 but the storage requirement is increased, if this is not the case. 842 fname : str 843 Filename of the output file. 844 name : str 845 The name of the observable. 846 spec : str 847 Optional string that describes the contents of the file. 848 origin : str 849 Specify where the data has its origin. 850 symbol : list 851 A list of symbols that describe the observables to be written. May be empty. 852 who : str 853 Provide the name of the person that exports the data. 854 enstags : dict 855 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 856 Otherwise, the ensemble name is used. 857 gz : bool 858 If True, the output is a gzipped XML. If False, the output is a XML file. 859 """ 860 861 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 862 863 if not fname.endswith('.xml') and not fname.endswith('.gz'): 864 fname += '.xml' 865 866 if gz: 867 if not fname.endswith('.gz'): 868 fname += '.gz' 869 870 fp = gzip.open(fname, 'wb') 871 fp.write(dobsstring.encode('utf-8')) 872 else: 873 fp = open(fname, 'w', encoding='utf-8') 874 fp.write(dobsstring) 875 fp.close()
89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 90 """Export a list of Obs or structures containing Obs to an xml string 91 according to the Zeuthen pobs format. 92 93 Tags are not written or recovered automatically. The separator | is removed from the replica names. 94 95 Parameters 96 ---------- 97 obsl : list 98 List of Obs that will be exported. 99 The Obs inside a structure have to be defined on the same ensemble. 100 name : str 101 The name of the observable. 102 spec : str 103 Optional string that describes the contents of the file. 104 origin : str 105 Specify where the data has its origin. 106 symbol : list 107 A list of symbols that describe the observables to be written. May be empty. 108 enstag : str 109 Enstag that is written to pobs. If None, the ensemble name is used. 110 """ 111 112 od = {} 113 ename = obsl[0].e_names[0] 114 names = list(obsl[0].deltas.keys()) 115 nr = len(names) 116 onames = [name.replace('|', '') for name in names] 117 for o in obsl: 118 if len(o.e_names) != 1: 119 raise Exception('You try to export dobs to obs!') 120 if o.e_names[0] != ename: 121 raise Exception('You try to export dobs to obs!') 122 if len(o.deltas.keys()) != nr: 123 raise Exception('Incompatible obses in list') 124 od['observables'] = {} 125 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 126 od['observables']['origin'] = { 127 'who': getpass.getuser(), 128 'date': str(datetime.datetime.now())[:-7], 129 'host': socket.gethostname(), 130 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 131 od['observables']['pobs'] = {} 132 pd = od['observables']['pobs'] 133 pd['spec'] = spec 134 pd['origin'] = origin 135 pd['name'] = name 136 if enstag: 137 if not isinstance(enstag, str): 138 raise Exception('enstag has to be a string!') 139 pd['enstag'] = enstag 140 else: 141 pd['enstag'] = ename 142 pd['nr'] = '%d' % (nr) 143 pd['array'] = [] 144 osymbol = 'cfg' 145 if not isinstance(symbol, list): 146 raise Exception('Symbol has to be a list!') 147 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 148 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 149 for s in symbol: 150 osymbol += ' %s' % s 151 for r in range(nr): 152 ad = {} 153 ad['id'] = onames[r] 154 Nconf = len(obsl[0].deltas[names[r]]) 155 layout = '%d i f%d' % (Nconf, len(obsl)) 156 ad['layout'] = layout 157 ad['symbol'] = osymbol 158 data = '' 159 for c in range(Nconf): 160 data += '%d ' % obsl[0].idl[names[r]][c] 161 for o in obsl: 162 num = o.deltas[names[r]][c] + o.r_values[names[r]] 163 if num == 0: 164 data += '0 ' 165 else: 166 data += '%1.16e ' % (num) 167 data += '\n' 168 ad['#data'] = data 169 pd['array'].append(ad) 170 171 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 172 return rs
Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
175def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 176 """Export a list of Obs or structures containing Obs to a .xml.gz file 177 according to the Zeuthen pobs format. 178 179 Tags are not written or recovered automatically. The separator | is removed from the replica names. 180 181 Parameters 182 ---------- 183 obsl : list 184 List of Obs that will be exported. 185 The Obs inside a structure have to be defined on the same ensemble. 186 fname : str 187 Filename of the output file. 188 name : str 189 The name of the observable. 190 spec : str 191 Optional string that describes the contents of the file. 192 origin : str 193 Specify where the data has its origin. 194 symbol : list 195 A list of symbols that describe the observables to be written. May be empty. 196 enstag : str 197 Enstag that is written to pobs. If None, the ensemble name is used. 198 gz : bool 199 If True, the output is a gzipped xml. If False, the output is an xml file. 200 """ 201 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 202 203 if not fname.endswith('.xml') and not fname.endswith('.gz'): 204 fname += '.xml' 205 206 if gz: 207 if not fname.endswith('.gz'): 208 fname += '.gz' 209 210 fp = gzip.open(fname, 'wb') 211 fp.write(pobsstring.encode('utf-8')) 212 else: 213 fp = open(fname, 'w', encoding='utf-8') 214 fp.write(pobsstring) 215 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
- gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
292def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 293 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 294 295 Tags are not written or recovered automatically. 296 297 Parameters 298 ---------- 299 fname : str 300 Filename of the input file. 301 full_output : bool 302 If True, a dict containing auxiliary information and the data is returned. 303 If False, only the data is returned as list. 304 separatior_insertion: str or int 305 str: replace all occurences of "separator_insertion" within the replica names 306 by "|%s" % (separator_insertion) when constructing the names of the replica. 307 int: Insert the separator "|" at the position given by separator_insertion. 308 None (default): Replica names remain unchanged. 309 """ 310 311 if not fname.endswith('.xml') and not fname.endswith('.gz'): 312 fname += '.xml' 313 if gz: 314 if not fname.endswith('.gz'): 315 fname += '.gz' 316 with gzip.open(fname, 'r') as fin: 317 content = fin.read() 318 else: 319 if fname.endswith('.gz'): 320 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 321 with open(fname, 'r') as fin: 322 content = fin.read() 323 324 # parse xml file content 325 root = et.fromstring(content) 326 327 _check(root[2].tag == 'pobs') 328 pobs = root[2] 329 330 version = root[0][1].text.strip() 331 332 _check(root[1].tag == 'origin') 333 file_origin = _etree_to_dict(root[1])['origin'] 334 335 deltas = [] 336 names = [] 337 idl = [] 338 for i in range(5, len(pobs)): 339 delta, name, idx = _import_rdata(pobs[i]) 340 deltas.append(delta) 341 if separator_insertion is None: 342 pass 343 elif isinstance(separator_insertion, int): 344 name = name[:separator_insertion] + '|' + name[separator_insertion:] 345 elif isinstance(separator_insertion, str): 346 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 347 else: 348 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 349 names.append(name) 350 idl.append(idx) 351 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 352 353 descriptiond = {} 354 for i in range(4): 355 descriptiond[pobs[i].tag] = pobs[i].text.strip() 356 357 _check(pobs[4].tag == "nr") 358 359 _check(pobs[5].tag == 'array') 360 if pobs[5][1].tag == 'symbol': 361 symbol = pobs[5][1].text.strip() 362 descriptiond['symbol'] = symbol 363 364 if full_output: 365 retd = {} 366 tool = file_origin.get('tool', None) 367 if tool: 368 program = tool['name'] + ' ' + tool['version'] 369 else: 370 program = '' 371 retd['program'] = program 372 retd['version'] = version 373 retd['who'] = file_origin['who'] 374 retd['date'] = file_origin['date'] 375 retd['host'] = file_origin['host'] 376 retd['description'] = descriptiond 377 retd['obsdata'] = res 378 return retd 379 else: 380 return res
Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
388def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True): 389 """Import a list of Obs from a string in the Zeuthen dobs format. 390 391 Tags are not written or recovered automatically. 392 393 Parameters 394 ---------- 395 content : str 396 XML string containing the data 397 noemtpy : bool 398 If True, ensembles with no contribution to the Obs are not included. 399 If False, ensembles are included as written in the file, possibly with vanishing entries. 400 full_output : bool 401 If True, a dict containing auxiliary information and the data is returned. 402 If False, only the data is returned as list. 403 separatior_insertion: str, int or bool 404 str: replace all occurences of "separator_insertion" within the replica names 405 by "|%s" % (separator_insertion) when constructing the names of the replica. 406 int: Insert the separator "|" at the position given by separator_insertion. 407 True (default): separator "|" is inserted after len(ensname), assuming that the 408 ensemble name is a prefix to the replica name. 409 None or False: No separator is inserted. 410 """ 411 412 root = et.fromstring(content) 413 414 _check(root.tag == 'OBSERVABLES') 415 _check(root[0].tag == 'SCHEMA') 416 version = root[0][1].text.strip() 417 418 _check(root[1].tag == 'origin') 419 file_origin = _etree_to_dict(root[1])['origin'] 420 421 _check(root[2].tag == 'dobs') 422 423 dobs = root[2] 424 425 descriptiond = {} 426 for i in range(3): 427 descriptiond[dobs[i].tag] = dobs[i].text.strip() 428 429 _check(dobs[3].tag == 'array') 430 431 symbol = [] 432 if dobs[3][1].tag == 'symbol': 433 symbol = dobs[3][1].text.strip() 434 descriptiond['symbol'] = symbol 435 mean = _import_array(dobs[3])[0] 436 437 _check(dobs[4].tag == "ne") 438 ne = int(dobs[4].text.strip()) 439 _check(dobs[5].tag == "nc") 440 nc = int(dobs[5].text.strip()) 441 442 idld = {} 443 deltad = {} 444 covd = {} 445 gradd = {} 446 names = [] 447 e_names = [] 448 enstags = {} 449 for k in range(6, len(list(dobs))): 450 if dobs[k].tag == "edata": 451 _check(dobs[k][0].tag == "enstag") 452 ename = dobs[k][0].text.strip() 453 e_names.append(ename) 454 _check(dobs[k][1].tag == "nr") 455 R = int(dobs[k][1].text.strip()) 456 for i in range(2, 2 + R): 457 deltas, rname, idx = _import_rdata(dobs[k][i]) 458 if separator_insertion is None or False: 459 pass 460 elif separator_insertion is True: 461 if rname.startswith(ename): 462 rname = rname[:len(ename)] + '|' + rname[len(ename):] 463 elif isinstance(separator_insertion, int): 464 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 465 elif isinstance(separator_insertion, str): 466 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 467 else: 468 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 469 if '|' in rname: 470 new_ename = rname[:rname.index('|')] 471 else: 472 new_ename = ename 473 enstags[new_ename] = ename 474 idld[rname] = idx 475 deltad[rname] = deltas 476 names.append(rname) 477 elif dobs[k].tag == "cdata": 478 cname, cov, grad = _import_cdata(dobs[k]) 479 covd[cname] = cov 480 if grad.shape[1] == 1: 481 gradd[cname] = [grad for i in range(len(mean))] 482 else: 483 gradd[cname] = grad.T 484 else: 485 _check(False) 486 names = list(set(names)) 487 488 for name in names: 489 for i in range(len(deltad[name])): 490 deltad[name][i] = np.array(deltad[name][i]) + mean[i] 491 492 res = [] 493 for i in range(len(mean)): 494 deltas = [] 495 idl = [] 496 obs_names = [] 497 for name in names: 498 h = np.unique(deltad[name][i]) 499 if len(h) == 1 and np.all(h == mean[i]) and noempty: 500 continue 501 deltas.append(deltad[name][i]) 502 obs_names.append(name) 503 idl.append(idld[name]) 504 res.append(Obs(deltas, obs_names, idl=idl)) 505 print(mean, 'vs', res) 506 _check(len(e_names) == ne) 507 508 cnames = list(covd.keys()) 509 for i in range(len(res)): 510 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 511 if noempty: 512 for name in cnames: 513 if np.all(new_covobs[name].grad == 0): 514 del new_covobs[name] 515 cnames_loc = list(new_covobs.keys()) 516 else: 517 cnames_loc = cnames 518 for name in cnames_loc: 519 res[i].names.append(name) 520 res[i].shape[name] = 1 521 res[i].idl[name] = [] 522 res[i]._covobs = new_covobs 523 524 if symbol: 525 for i in range(len(res)): 526 res[i].tag = symbol[i] 527 if res[i].tag == 'None': 528 res[i].tag = None 529 if not noempty: 530 _check(len(res[0].covobs.keys()) == nc) 531 if full_output: 532 retd = {} 533 tool = file_origin.get('tool', None) 534 if tool: 535 program = tool['name'] + ' ' + tool['version'] 536 else: 537 program = '' 538 retd['program'] = program 539 retd['version'] = version 540 retd['who'] = file_origin['who'] 541 retd['date'] = file_origin['date'] 542 retd['host'] = file_origin['host'] 543 retd['description'] = descriptiond 544 retd['enstags'] = enstags 545 retd['obsdata'] = res 546 return retd 547 else: 548 return res
Import a list of Obs from a string in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- content (str): XML string containing the data
- noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file, possibly with vanishing entries.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
551def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True): 552 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 553 554 Tags are not written or recovered automatically. 555 556 Parameters 557 ---------- 558 fname : str 559 Filename of the input file. 560 noemtpy : bool 561 If True, ensembles with no contribution to the Obs are not included. 562 If False, ensembles are included as written in the file. 563 full_output : bool 564 If True, a dict containing auxiliary information and the data is returned. 565 If False, only the data is returned as list. 566 gz : bool 567 If True, assumes that data is gzipped. If False, assumes XML file. 568 separatior_insertion: str, int or bool 569 str: replace all occurences of "separator_insertion" within the replica names 570 by "|%s" % (separator_insertion) when constructing the names of the replica. 571 int: Insert the separator "|" at the position given by separator_insertion. 572 True (default): separator "|" is inserted after len(ensname), assuming that the 573 ensemble name is a prefix to the replica name. 574 None or False: No separator is inserted. 575 """ 576 577 if not fname.endswith('.xml') and not fname.endswith('.gz'): 578 fname += '.xml' 579 if gz: 580 if not fname.endswith('.gz'): 581 fname += '.gz' 582 with gzip.open(fname, 'r') as fin: 583 content = fin.read().decode('utf-8') 584 else: 585 if fname.endswith('.gz'): 586 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 587 with open(fname, 'r', encoding='utf-8') as fin: 588 content = fin.read() 589 590 # open and read gzipped xml file 591 infile = gzip.open(fname) 592 content = infile.read() 593 594 return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- noemtpy (bool): If True, ensembles with no contribution to the Obs are not included. If False, ensembles are included as written in the file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
656def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): 657 """Generate the string for the export of a list of Obs or structures containing Obs 658 to a .xml.gz file according to the Zeuthen dobs format. 659 660 Tags are not written or recovered automatically. The separator |is removed from the replica names. 661 662 Parameters 663 ---------- 664 obsl : list 665 List of Obs that will be exported. 666 The Obs inside a structure do not have to be defined on the same set of configurations, 667 but the storage requirement is increased, if this is not the case. 668 name : str 669 The name of the observable. 670 spec : str 671 Optional string that describes the contents of the file. 672 origin : str 673 Specify where the data has its origin. 674 symbol : list 675 A list of symbols that describe the observables to be written. May be empty. 676 who : str 677 Provide the name of the person that exports the data. 678 enstags : dict 679 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 680 Otherwise, the ensemble name is used. 681 """ 682 if enstags is None: 683 enstags = {} 684 od = {} 685 r_names = [] 686 for o in obsl: 687 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 688 r_names = sorted(set(r_names)) 689 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 690 for tmpname in mc_names: 691 if tmpname not in enstags: 692 enstags[tmpname] = tmpname 693 ne = len(set(mc_names)) 694 cov_names = [] 695 for o in obsl: 696 cov_names += list(o.cov_names) 697 cov_names = sorted(set(cov_names)) 698 nc = len(set(cov_names)) 699 od['OBSERVABLES'] = {} 700 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 701 if who is None: 702 who = getpass.getuser() 703 od['OBSERVABLES']['origin'] = { 704 'who': who, 705 'date': str(datetime.datetime.now())[:-7], 706 'host': socket.gethostname(), 707 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 708 od['OBSERVABLES']['dobs'] = {} 709 pd = od['OBSERVABLES']['dobs'] 710 pd['spec'] = spec 711 pd['origin'] = origin 712 pd['name'] = name 713 pd['array'] = {} 714 pd['array']['id'] = 'val' 715 pd['array']['layout'] = '1 f%d' % (len(obsl)) 716 osymbol = '' 717 if symbol: 718 if not isinstance(symbol, list): 719 raise Exception('Symbol has to be a list!') 720 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 721 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 722 osymbol = symbol[0] 723 for s in symbol[1:]: 724 osymbol += ' %s' % s 725 pd['array']['symbol'] = osymbol 726 727 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 728 pd['ne'] = '%d' % (ne) 729 pd['nc'] = '%d' % (nc) 730 pd['edata'] = [] 731 for name in mc_names: 732 ed = {} 733 ed['enstag'] = enstags[name] 734 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 735 nr = len(onames) 736 ed['nr'] = nr 737 ed[''] = [] 738 739 for r in range(nr): 740 ad = {} 741 repname = onames[r] 742 ad['id'] = repname.replace('|', '') 743 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 744 Nconf = len(idx) 745 layout = '%d i f%d' % (Nconf, len(obsl)) 746 ad['layout'] = layout 747 data = '' 748 counters = [0 for o in obsl] 749 for ci in idx: 750 data += '%d ' % ci 751 for oi in range(len(obsl)): 752 o = obsl[oi] 753 if repname in o.idl: 754 if counters[oi] < 0: 755 data += '0 ' 756 continue 757 if o.idl[repname][counters[oi]] == ci: 758 num = o.deltas[repname][counters[oi]] 759 if num == 0: 760 data += '0 ' 761 else: 762 data += '%1.16e ' % (num) 763 counters[oi] += 1 764 if counters[oi] >= len(o.idl[repname]): 765 counters[oi] = -1 766 else: 767 data += '0 ' 768 else: 769 data += '0 ' 770 data += '\n' 771 ad['#data'] = data 772 ed[''].append(ad) 773 pd['edata'].append(ed) 774 775 allcov = {} 776 for o in obsl: 777 for name in o.cov_names: 778 if name in allcov: 779 if not np.array_equal(allcov[name], o.covobs[name].cov): 780 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 781 else: 782 allcov[name] = o.covobs[name].cov 783 pd['cdata'] = [] 784 for name in cov_names: 785 cd = {} 786 cd['id'] = name 787 788 covd = {'id': 'cov'} 789 if allcov[name].shape == (): 790 ncov = 1 791 covd['layout'] = '1 1 f' 792 covd['#data'] = '%1.14e' % (allcov[name]) 793 else: 794 shape = allcov[name].shape 795 assert (shape[0] == shape[1]) 796 ncov = shape[0] 797 covd['layout'] = '%d %d f' % (ncov, ncov) 798 ds = '' 799 for i in range(ncov): 800 for j in range(ncov): 801 val = allcov[name][i][j] 802 if val == 0: 803 ds += '0 ' 804 else: 805 ds += '%1.14e ' % (val) 806 ds += '\n' 807 covd['#data'] = ds 808 809 gradd = {'id': 'grad'} 810 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 811 ds = '' 812 for i in range(ncov): 813 for o in obsl: 814 if name in o.covobs: 815 val = o.covobs[name].grad[i] 816 if val != 0: 817 ds += '%1.14e ' % (val) 818 else: 819 ds += '0 ' 820 else: 821 ds += '0 ' 822 gradd['#data'] = ds 823 cd['array'] = [covd, gradd] 824 pd['cdata'].append(cd) 825 826 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 827 828 return rs
Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator |is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
831def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags={}, gz=True): 832 """Export a list of Obs or structures containing Obs to a .xml.gz file 833 according to the Zeuthen dobs format. 834 835 Tags are not written or recovered automatically. The separator | is removed from the replica names. 836 837 Parameters 838 ---------- 839 obsl : list 840 List of Obs that will be exported. 841 The Obs inside a structure do not have to be defined on the same set of configurations, 842 but the storage requirement is increased, if this is not the case. 843 fname : str 844 Filename of the output file. 845 name : str 846 The name of the observable. 847 spec : str 848 Optional string that describes the contents of the file. 849 origin : str 850 Specify where the data has its origin. 851 symbol : list 852 A list of symbols that describe the observables to be written. May be empty. 853 who : str 854 Provide the name of the person that exports the data. 855 enstags : dict 856 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 857 Otherwise, the ensemble name is used. 858 gz : bool 859 If True, the output is a gzipped XML. If False, the output is a XML file. 860 """ 861 862 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 863 864 if not fname.endswith('.xml') and not fname.endswith('.gz'): 865 fname += '.xml' 866 867 if gz: 868 if not fname.endswith('.gz'): 869 fname += '.gz' 870 871 fp = gzip.open(fname, 'wb') 872 fp.write(dobsstring.encode('utf-8')) 873 else: 874 fp = open(fname, 'w', encoding='utf-8') 875 fp.write(dobsstring) 876 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
- gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.