pyerrors.input.dobs
1from collections import defaultdict 2import gzip 3import lxml.etree as et 4import getpass 5import socket 6import datetime 7import json 8import warnings 9import numpy as np 10from ..obs import Obs 11from ..obs import _merge_idx 12from ..covobs import Covobs 13from .. import version as pyerrorsversion 14 15 16# Based on https://stackoverflow.com/a/10076823 17def _etree_to_dict(t): 18 """ Convert the content of an XML file to a python dict""" 19 d = {t.tag: {} if t.attrib else None} 20 children = list(t) 21 if children: 22 dd = defaultdict(list) 23 for dc in map(_etree_to_dict, children): 24 for k, v in dc.items(): 25 dd[k].append(v) 26 d = {t.tag: {k: v[0] if len(v) == 1 else v 27 for k, v in dd.items()}} 28 if t.attrib: 29 d[t.tag].update(('@' + k, v) 30 for k, v in t.attrib.items()) 31 if t.text: 32 text = t.text.strip() 33 if children or t.attrib: 34 if text: 35 d[t.tag]['#data'] = [text] 36 else: 37 d[t.tag] = text 38 return d 39 40 41def _dict_to_xmlstring(d): 42 if isinstance(d, dict): 43 iters = '' 44 for k in d: 45 if k.startswith('#'): 46 for la in d[k]: 47 iters += la 48 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 49 return iters 50 if isinstance(d[k], dict): 51 iters += '<%s>\n' % (k) + _dict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 52 elif isinstance(d[k], str): 53 if len(d[k]) > 100: 54 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 55 else: 56 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 57 elif isinstance(d[k], list): 58 for i in range(len(d[k])): 59 iters += _dict_to_xmlstring(d[k][i]) 60 elif not d[k]: 61 return '\n' 62 else: 63 raise Exception('Type', type(d[k]), 'not supported in export!') 64 else: 65 raise Exception('Type', type(d), 'not supported in export!') 66 return iters 67 68 69def _dict_to_xmlstring_spaces(d, space=' '): 70 s = _dict_to_xmlstring(d) 71 o = '' 72 c = 0 73 cm = False 74 for li in s.split('\n'): 75 if li.startswith('<%s' % ('/')): 76 c -= 1 77 cm = True 78 for i in range(c): 79 o += space 80 o += li + '\n' 81 if li.startswith('<') and not cm: 82 if not '<%s' % ('/') in li: 83 c += 1 84 cm = False 85 return o 86 87 88def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 89 """Export a list of Obs or structures containing Obs to an xml string 90 according to the Zeuthen pobs format. 91 92 Tags are not written or recovered automatically. The separator | is removed from the replica names. 93 94 Parameters 95 ---------- 96 obsl : list 97 List of Obs that will be exported. 98 The Obs inside a structure have to be defined on the same ensemble. 99 name : str 100 The name of the observable. 101 spec : str 102 Optional string that describes the contents of the file. 103 origin : str 104 Specify where the data has its origin. 105 symbol : list 106 A list of symbols that describe the observables to be written. May be empty. 107 enstag : str 108 Enstag that is written to pobs. If None, the ensemble name is used. 109 110 Returns 111 ------- 112 xml_str : str 113 XML formatted string of the input data 114 """ 115 116 od = {} 117 ename = obsl[0].e_names[0] 118 names = list(obsl[0].deltas.keys()) 119 nr = len(names) 120 onames = [name.replace('|', '') for name in names] 121 for o in obsl: 122 if len(o.e_names) != 1: 123 raise Exception('You try to export dobs to obs!') 124 if o.e_names[0] != ename: 125 raise Exception('You try to export dobs to obs!') 126 if len(o.deltas.keys()) != nr: 127 raise Exception('Incompatible obses in list') 128 od['observables'] = {} 129 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 130 od['observables']['origin'] = { 131 'who': getpass.getuser(), 132 'date': str(datetime.datetime.now())[:-7], 133 'host': socket.gethostname(), 134 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 135 od['observables']['pobs'] = {} 136 pd = od['observables']['pobs'] 137 pd['spec'] = spec 138 pd['origin'] = origin 139 pd['name'] = name 140 if enstag: 141 if not isinstance(enstag, str): 142 raise Exception('enstag has to be a string!') 143 pd['enstag'] = enstag 144 else: 145 pd['enstag'] = ename 146 pd['nr'] = '%d' % (nr) 147 pd['array'] = [] 148 osymbol = 'cfg' 149 if not isinstance(symbol, list): 150 raise Exception('Symbol has to be a list!') 151 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 152 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 153 for s in symbol: 154 osymbol += ' %s' % s 155 for r in range(nr): 156 ad = {} 157 ad['id'] = onames[r] 158 Nconf = len(obsl[0].deltas[names[r]]) 159 layout = '%d i f%d' % (Nconf, len(obsl)) 160 ad['layout'] = layout 161 ad['symbol'] = osymbol 162 data = '' 163 for c in range(Nconf): 164 data += '%d ' % obsl[0].idl[names[r]][c] 165 for o in obsl: 166 num = o.deltas[names[r]][c] + o.r_values[names[r]] 167 if num == 0: 168 data += '0 ' 169 else: 170 data += '%1.16e ' % (num) 171 data += '\n' 172 ad['#data'] = data 173 pd['array'].append(ad) 174 175 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 176 return rs 177 178 179def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 180 """Export a list of Obs or structures containing Obs to a .xml.gz file 181 according to the Zeuthen pobs format. 182 183 Tags are not written or recovered automatically. The separator | is removed from the replica names. 184 185 Parameters 186 ---------- 187 obsl : list 188 List of Obs that will be exported. 189 The Obs inside a structure have to be defined on the same ensemble. 190 fname : str 191 Filename of the output file. 192 name : str 193 The name of the observable. 194 spec : str 195 Optional string that describes the contents of the file. 196 origin : str 197 Specify where the data has its origin. 198 symbol : list 199 A list of symbols that describe the observables to be written. May be empty. 200 enstag : str 201 Enstag that is written to pobs. If None, the ensemble name is used. 202 gz : bool 203 If True, the output is a gzipped xml. If False, the output is an xml file. 204 205 Returns 206 ------- 207 None 208 """ 209 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 210 211 if not fname.endswith('.xml') and not fname.endswith('.gz'): 212 fname += '.xml' 213 214 if gz: 215 if not fname.endswith('.gz'): 216 fname += '.gz' 217 218 fp = gzip.open(fname, 'wb') 219 fp.write(pobsstring.encode('utf-8')) 220 else: 221 fp = open(fname, 'w', encoding='utf-8') 222 fp.write(pobsstring) 223 fp.close() 224 225 226def _import_data(string): 227 return json.loads("[" + ",".join(string.replace(' +', ' ').split()) + "]") 228 229 230def _check(condition): 231 if not condition: 232 raise Exception("XML file format not supported") 233 234 235class _NoTagInDataError(Exception): 236 """Raised when tag is not in data""" 237 def __init__(self, tag): 238 self.tag = tag 239 super().__init__('Tag %s not in data!' % (self.tag)) 240 241 242def _find_tag(dat, tag): 243 for i in range(len(dat)): 244 if dat[i].tag == tag: 245 return i 246 raise _NoTagInDataError(tag) 247 248 249def _import_array(arr): 250 name = arr[_find_tag(arr, 'id')].text.strip() 251 index = _find_tag(arr, 'layout') 252 try: 253 sindex = _find_tag(arr, 'symbol') 254 except _NoTagInDataError: 255 sindex = 0 256 if sindex > index: 257 tmp = _import_data(arr[sindex].tail) 258 else: 259 tmp = _import_data(arr[index].tail) 260 261 li = arr[index].text.strip() 262 m = li.split() 263 if m[1] == "i" and m[2][0] == "f": 264 nc = int(m[0]) 265 na = int(m[2].lstrip('f')) 266 _dat = [] 267 mask = [] 268 for a in range(na): 269 mask += [a] 270 _dat += [np.array(tmp[1 + a:: na + 1])] 271 _check(len(tmp[0:: na + 1]) == nc) 272 return [name, tmp[0:: na + 1], mask, _dat] 273 elif m[1][0] == 'f' and len(m) < 3: 274 sh = (int(m[0]), int(m[1].lstrip('f'))) 275 return np.reshape(tmp, sh) 276 elif any(['f' in s for s in m]): 277 for si in range(len(m)): 278 if m[si] == 'f': 279 break 280 sh = [int(m[i]) for i in range(si)] 281 return np.reshape(tmp, sh) 282 else: 283 print(name, m) 284 _check(False) 285 286 287def _import_rdata(rd): 288 name, idx, mask, deltas = _import_array(rd) 289 return deltas, name, idx 290 291 292def _import_cdata(cd): 293 _check(cd[0].tag == "id") 294 _check(cd[1][0].text.strip() == "cov") 295 cov = _import_array(cd[1]) 296 grad = _import_array(cd[2]) 297 return cd[0].text.strip(), cov, grad 298 299 300def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 301 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 302 303 Tags are not written or recovered automatically. 304 305 Parameters 306 ---------- 307 fname : str 308 Filename of the input file. 309 full_output : bool 310 If True, a dict containing auxiliary information and the data is returned. 311 If False, only the data is returned as list. 312 separatior_insertion: str or int 313 str: replace all occurences of "separator_insertion" within the replica names 314 by "|%s" % (separator_insertion) when constructing the names of the replica. 315 int: Insert the separator "|" at the position given by separator_insertion. 316 None (default): Replica names remain unchanged. 317 318 Returns 319 ------- 320 res : list[Obs] 321 Imported data 322 or 323 res : dict 324 Imported data and meta-data 325 """ 326 327 if not fname.endswith('.xml') and not fname.endswith('.gz'): 328 fname += '.xml' 329 if gz: 330 if not fname.endswith('.gz'): 331 fname += '.gz' 332 with gzip.open(fname, 'r') as fin: 333 content = fin.read() 334 else: 335 if fname.endswith('.gz'): 336 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 337 with open(fname, 'r') as fin: 338 content = fin.read() 339 340 # parse xml file content 341 root = et.fromstring(content) 342 343 _check(root[2].tag == 'pobs') 344 pobs = root[2] 345 346 version = root[0][1].text.strip() 347 348 _check(root[1].tag == 'origin') 349 file_origin = _etree_to_dict(root[1])['origin'] 350 351 deltas = [] 352 names = [] 353 idl = [] 354 for i in range(5, len(pobs)): 355 delta, name, idx = _import_rdata(pobs[i]) 356 deltas.append(delta) 357 if separator_insertion is None: 358 pass 359 elif isinstance(separator_insertion, int): 360 name = name[:separator_insertion] + '|' + name[separator_insertion:] 361 elif isinstance(separator_insertion, str): 362 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 363 else: 364 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 365 names.append(name) 366 idl.append(idx) 367 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 368 369 descriptiond = {} 370 for i in range(4): 371 descriptiond[pobs[i].tag] = pobs[i].text.strip() 372 373 _check(pobs[4].tag == "nr") 374 375 _check(pobs[5].tag == 'array') 376 if pobs[5][1].tag == 'symbol': 377 symbol = pobs[5][1].text.strip() 378 descriptiond['symbol'] = symbol 379 380 if full_output: 381 retd = {} 382 tool = file_origin.get('tool', None) 383 if tool: 384 program = tool['name'] + ' ' + tool['version'] 385 else: 386 program = '' 387 retd['program'] = program 388 retd['version'] = version 389 retd['who'] = file_origin['who'] 390 retd['date'] = file_origin['date'] 391 retd['host'] = file_origin['host'] 392 retd['description'] = descriptiond 393 retd['obsdata'] = res 394 return retd 395 else: 396 return res 397 398 399# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py 400def import_dobs_string(content, full_output=False, separator_insertion=True): 401 """Import a list of Obs from a string in the Zeuthen dobs format. 402 403 Tags are not written or recovered automatically. 404 405 Parameters 406 ---------- 407 content : str 408 XML string containing the data 409 full_output : bool 410 If True, a dict containing auxiliary information and the data is returned. 411 If False, only the data is returned as list. 412 separatior_insertion: str, int or bool 413 str: replace all occurences of "separator_insertion" within the replica names 414 by "|%s" % (separator_insertion) when constructing the names of the replica. 415 int: Insert the separator "|" at the position given by separator_insertion. 416 True (default): separator "|" is inserted after len(ensname), assuming that the 417 ensemble name is a prefix to the replica name. 418 None or False: No separator is inserted. 419 420 Returns 421 ------- 422 res : list[Obs] 423 Imported data 424 or 425 res : dict 426 Imported data and meta-data 427 """ 428 429 root = et.fromstring(content) 430 431 _check(root.tag == 'OBSERVABLES') 432 _check(root[0].tag == 'SCHEMA') 433 version = root[0][1].text.strip() 434 435 _check(root[1].tag == 'origin') 436 file_origin = _etree_to_dict(root[1])['origin'] 437 438 _check(root[2].tag == 'dobs') 439 440 dobs = root[2] 441 442 descriptiond = {} 443 for i in range(3): 444 descriptiond[dobs[i].tag] = dobs[i].text.strip() 445 446 _check(dobs[3].tag == 'array') 447 448 symbol = [] 449 if dobs[3][1].tag == 'symbol': 450 symbol = dobs[3][1].text.strip() 451 descriptiond['symbol'] = symbol 452 mean = _import_array(dobs[3])[0] 453 454 _check(dobs[4].tag == "ne") 455 ne = int(dobs[4].text.strip()) 456 _check(dobs[5].tag == "nc") 457 458 idld = {} 459 deltad = {} 460 covd = {} 461 gradd = {} 462 names = [] 463 e_names = [] 464 enstags = {} 465 for k in range(6, len(list(dobs))): 466 if dobs[k].tag == "edata": 467 _check(dobs[k][0].tag == "enstag") 468 ename = dobs[k][0].text.strip() 469 e_names.append(ename) 470 _check(dobs[k][1].tag == "nr") 471 R = int(dobs[k][1].text.strip()) 472 for i in range(2, 2 + R): 473 deltas, rname, idx = _import_rdata(dobs[k][i]) 474 if separator_insertion is None or False: 475 pass 476 elif separator_insertion is True: 477 if rname.startswith(ename): 478 rname = rname[:len(ename)] + '|' + rname[len(ename):] 479 elif isinstance(separator_insertion, int): 480 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 481 elif isinstance(separator_insertion, str): 482 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 483 else: 484 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 485 if '|' in rname: 486 new_ename = rname[:rname.index('|')] 487 else: 488 new_ename = ename 489 enstags[new_ename] = ename 490 idld[rname] = idx 491 deltad[rname] = deltas 492 names.append(rname) 493 elif dobs[k].tag == "cdata": 494 cname, cov, grad = _import_cdata(dobs[k]) 495 covd[cname] = cov 496 if grad.shape[1] == 1: 497 gradd[cname] = [grad for i in range(len(mean))] 498 else: 499 gradd[cname] = grad.T 500 else: 501 _check(False) 502 names = list(set(names)) 503 504 for name in names: 505 for i in range(len(deltad[name])): 506 tmp = np.zeros_like(deltad[name][i]) 507 for j in range(len(deltad[name][i])): 508 if deltad[name][i][j] != 0.: 509 tmp[j] = deltad[name][i][j] + mean[i] 510 deltad[name][i] = tmp 511 512 res = [] 513 for i in range(len(mean)): 514 deltas = [] 515 idl = [] 516 obs_names = [] 517 for name in names: 518 h = np.unique(deltad[name][i]) 519 if len(h) == 1 and np.all(h == mean[i]): 520 continue 521 repdeltas = [] 522 repidl = [] 523 for j in range(len(deltad[name][i])): 524 if deltad[name][i][j] != 0.: 525 repdeltas.append(deltad[name][i][j]) 526 repidl.append(idld[name][j]) 527 if len(repdeltas) > 0: 528 obs_names.append(name) 529 deltas.append(repdeltas) 530 idl.append(repidl) 531 532 res.append(Obs(deltas, obs_names, idl=idl)) 533 res[-1]._value = mean[i] 534 _check(len(e_names) == ne) 535 536 cnames = list(covd.keys()) 537 for i in range(len(res)): 538 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 539 for name in cnames: 540 if np.all(new_covobs[name].grad == 0): 541 del new_covobs[name] 542 cnames_loc = list(new_covobs.keys()) 543 for name in cnames_loc: 544 res[i].names.append(name) 545 res[i].shape[name] = 1 546 res[i].idl[name] = [] 547 res[i]._covobs = new_covobs 548 549 if symbol: 550 for i in range(len(res)): 551 res[i].tag = symbol[i] 552 if res[i].tag == 'None': 553 res[i].tag = None 554 if full_output: 555 retd = {} 556 tool = file_origin.get('tool', None) 557 if tool: 558 program = tool['name'] + ' ' + tool['version'] 559 else: 560 program = '' 561 retd['program'] = program 562 retd['version'] = version 563 retd['who'] = file_origin['who'] 564 retd['date'] = file_origin['date'] 565 retd['host'] = file_origin['host'] 566 retd['description'] = descriptiond 567 retd['enstags'] = enstags 568 retd['obsdata'] = res 569 return retd 570 else: 571 return res 572 573 574def read_dobs(fname, full_output=False, gz=True, separator_insertion=True): 575 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 576 577 Tags are not written or recovered automatically. 578 579 Parameters 580 ---------- 581 fname : str 582 Filename of the input file. 583 full_output : bool 584 If True, a dict containing auxiliary information and the data is returned. 585 If False, only the data is returned as list. 586 gz : bool 587 If True, assumes that data is gzipped. If False, assumes XML file. 588 separatior_insertion: str, int or bool 589 str: replace all occurences of "separator_insertion" within the replica names 590 by "|%s" % (separator_insertion) when constructing the names of the replica. 591 int: Insert the separator "|" at the position given by separator_insertion. 592 True (default): separator "|" is inserted after len(ensname), assuming that the 593 ensemble name is a prefix to the replica name. 594 None or False: No separator is inserted. 595 596 Returns 597 ------- 598 res : list[Obs] 599 Imported data 600 or 601 res : dict 602 Imported data and meta-data 603 """ 604 605 if not fname.endswith('.xml') and not fname.endswith('.gz'): 606 fname += '.xml' 607 if gz: 608 if not fname.endswith('.gz'): 609 fname += '.gz' 610 with gzip.open(fname, 'r') as fin: 611 content = fin.read() 612 else: 613 if fname.endswith('.gz'): 614 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 615 with open(fname, 'r') as fin: 616 content = fin.read() 617 618 return import_dobs_string(content, full_output, separator_insertion=separator_insertion) 619 620 621def _dobsdict_to_xmlstring(d): 622 if isinstance(d, dict): 623 iters = '' 624 for k in d: 625 if k.startswith('#value'): 626 for li in d[k]: 627 iters += li 628 return iters + '\n' 629 elif k.startswith('#'): 630 for li in d[k]: 631 iters += li 632 iters = '<array>\n' + iters + '<%sarray>\n' % ('/') 633 return iters 634 if isinstance(d[k], dict): 635 iters += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k]) + '<%s%s>\n' % ('/', k) 636 elif isinstance(d[k], str): 637 if len(d[k]) > 100: 638 iters += '<%s>\n ' % (k) + d[k] + ' \n<%s%s>\n' % ('/', k) 639 else: 640 iters += '<%s> ' % (k) + d[k] + ' <%s%s>\n' % ('/', k) 641 elif isinstance(d[k], list): 642 tmps = '' 643 if k in ['edata', 'cdata']: 644 for i in range(len(d[k])): 645 tmps += '<%s>\n' % (k) + _dobsdict_to_xmlstring(d[k][i]) + '</%s>\n' % (k) 646 else: 647 for i in range(len(d[k])): 648 tmps += _dobsdict_to_xmlstring(d[k][i]) 649 iters += tmps 650 elif isinstance(d[k], (int, float)): 651 iters += '<%s> ' % (k) + str(d[k]) + ' <%s%s>\n' % ('/', k) 652 elif not d[k]: 653 return '\n' 654 else: 655 raise Exception('Type', type(d[k]), 'not supported in export!') 656 else: 657 raise Exception('Type', type(d), 'not supported in export!') 658 return iters 659 660 661def _dobsdict_to_xmlstring_spaces(d, space=' '): 662 s = _dobsdict_to_xmlstring(d) 663 o = '' 664 c = 0 665 cm = False 666 for li in s.split('\n'): 667 if li.startswith('<%s' % ('/')): 668 c -= 1 669 cm = True 670 for i in range(c): 671 o += space 672 o += li + '\n' 673 if li.startswith('<') and not cm: 674 if not '<%s' % ('/') in li: 675 c += 1 676 cm = False 677 return o 678 679 680def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): 681 """Generate the string for the export of a list of Obs or structures containing Obs 682 to a .xml.gz file according to the Zeuthen dobs format. 683 684 Tags are not written or recovered automatically. The separator |is removed from the replica names. 685 686 Parameters 687 ---------- 688 obsl : list 689 List of Obs that will be exported. 690 The Obs inside a structure do not have to be defined on the same set of configurations, 691 but the storage requirement is increased, if this is not the case. 692 name : str 693 The name of the observable. 694 spec : str 695 Optional string that describes the contents of the file. 696 origin : str 697 Specify where the data has its origin. 698 symbol : list 699 A list of symbols that describe the observables to be written. May be empty. 700 who : str 701 Provide the name of the person that exports the data. 702 enstags : dict 703 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 704 Otherwise, the ensemble name is used. 705 706 Returns 707 ------- 708 xml_str : str 709 XML string generated from the data 710 """ 711 if enstags is None: 712 enstags = {} 713 od = {} 714 r_names = [] 715 for o in obsl: 716 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 717 r_names = sorted(set(r_names)) 718 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 719 for tmpname in mc_names: 720 if tmpname not in enstags: 721 enstags[tmpname] = tmpname 722 ne = len(set(mc_names)) 723 cov_names = [] 724 for o in obsl: 725 cov_names += list(o.cov_names) 726 cov_names = sorted(set(cov_names)) 727 nc = len(set(cov_names)) 728 od['OBSERVABLES'] = {} 729 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 730 if who is None: 731 who = getpass.getuser() 732 od['OBSERVABLES']['origin'] = { 733 'who': who, 734 'date': str(datetime.datetime.now())[:-7], 735 'host': socket.gethostname(), 736 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 737 od['OBSERVABLES']['dobs'] = {} 738 pd = od['OBSERVABLES']['dobs'] 739 pd['spec'] = spec 740 pd['origin'] = origin 741 pd['name'] = name 742 pd['array'] = {} 743 pd['array']['id'] = 'val' 744 pd['array']['layout'] = '1 f%d' % (len(obsl)) 745 osymbol = '' 746 if symbol: 747 if not isinstance(symbol, list): 748 raise Exception('Symbol has to be a list!') 749 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 750 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 751 osymbol = symbol[0] 752 for s in symbol[1:]: 753 osymbol += ' %s' % s 754 pd['array']['symbol'] = osymbol 755 756 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 757 pd['ne'] = '%d' % (ne) 758 pd['nc'] = '%d' % (nc) 759 pd['edata'] = [] 760 for name in mc_names: 761 ed = {} 762 ed['enstag'] = enstags[name] 763 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 764 nr = len(onames) 765 ed['nr'] = nr 766 ed[''] = [] 767 768 for r in range(nr): 769 ad = {} 770 repname = onames[r] 771 ad['id'] = repname.replace('|', '') 772 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 773 Nconf = len(idx) 774 layout = '%d i f%d' % (Nconf, len(obsl)) 775 ad['layout'] = layout 776 data = '' 777 counters = [0 for o in obsl] 778 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] 779 for ci in idx: 780 data += '%d ' % ci 781 for oi in range(len(obsl)): 782 o = obsl[oi] 783 if repname in o.idl: 784 if counters[oi] < 0: 785 num = 0 786 if num == 0: 787 data += '0 ' 788 else: 789 data += '%1.16e ' % (num) 790 continue 791 if o.idl[repname][counters[oi]] == ci: 792 num = o.deltas[repname][counters[oi]] + offsets[oi] 793 if num == 0: 794 data += '0 ' 795 else: 796 data += '%1.16e ' % (num) 797 counters[oi] += 1 798 if counters[oi] >= len(o.idl[repname]): 799 counters[oi] = -1 800 else: 801 num = 0 802 if num == 0: 803 data += '0 ' 804 else: 805 data += '%1.16e ' % (num) 806 else: 807 data += '0 ' 808 data += '\n' 809 ad['#data'] = data 810 ed[''].append(ad) 811 pd['edata'].append(ed) 812 813 allcov = {} 814 for o in obsl: 815 for cname in o.cov_names: 816 if cname in allcov: 817 if not np.array_equal(allcov[cname], o.covobs[cname].cov): 818 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) 819 else: 820 allcov[cname] = o.covobs[cname].cov 821 pd['cdata'] = [] 822 for cname in cov_names: 823 cd = {} 824 cd['id'] = cname 825 826 covd = {'id': 'cov'} 827 if allcov[cname].shape == (): 828 ncov = 1 829 covd['layout'] = '1 1 f' 830 covd['#data'] = '%1.14e' % (allcov[cname]) 831 else: 832 shape = allcov[cname].shape 833 assert (shape[0] == shape[1]) 834 ncov = shape[0] 835 covd['layout'] = '%d %d f' % (ncov, ncov) 836 ds = '' 837 for i in range(ncov): 838 for j in range(ncov): 839 val = allcov[cname][i][j] 840 if val == 0: 841 ds += '0 ' 842 else: 843 ds += '%1.14e ' % (val) 844 ds += '\n' 845 covd['#data'] = ds 846 847 gradd = {'id': 'grad'} 848 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 849 ds = '' 850 for i in range(ncov): 851 for o in obsl: 852 if cname in o.covobs: 853 val = o.covobs[cname].grad[i].item() 854 if val != 0: 855 ds += '%1.14e ' % (val) 856 else: 857 ds += '0 ' 858 else: 859 ds += '0 ' 860 gradd['#data'] = ds 861 cd['array'] = [covd, gradd] 862 pd['cdata'].append(cd) 863 864 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 865 866 return rs 867 868 869def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): 870 """Export a list of Obs or structures containing Obs to a .xml.gz file 871 according to the Zeuthen dobs format. 872 873 Tags are not written or recovered automatically. The separator | is removed from the replica names. 874 875 Parameters 876 ---------- 877 obsl : list 878 List of Obs that will be exported. 879 The Obs inside a structure do not have to be defined on the same set of configurations, 880 but the storage requirement is increased, if this is not the case. 881 fname : str 882 Filename of the output file. 883 name : str 884 The name of the observable. 885 spec : str 886 Optional string that describes the contents of the file. 887 origin : str 888 Specify where the data has its origin. 889 symbol : list 890 A list of symbols that describe the observables to be written. May be empty. 891 who : str 892 Provide the name of the person that exports the data. 893 enstags : dict 894 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 895 Otherwise, the ensemble name is used. 896 gz : bool 897 If True, the output is a gzipped XML. If False, the output is a XML file. 898 899 Returns 900 ------- 901 None 902 """ 903 if enstags is None: 904 enstags = {} 905 906 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 907 908 if not fname.endswith('.xml') and not fname.endswith('.gz'): 909 fname += '.xml' 910 911 if gz: 912 if not fname.endswith('.gz'): 913 fname += '.gz' 914 915 fp = gzip.open(fname, 'wb') 916 fp.write(dobsstring.encode('utf-8')) 917 else: 918 fp = open(fname, 'w', encoding='utf-8') 919 fp.write(dobsstring) 920 fp.close()
89def create_pobs_string(obsl, name, spec='', origin='', symbol=[], enstag=None): 90 """Export a list of Obs or structures containing Obs to an xml string 91 according to the Zeuthen pobs format. 92 93 Tags are not written or recovered automatically. The separator | is removed from the replica names. 94 95 Parameters 96 ---------- 97 obsl : list 98 List of Obs that will be exported. 99 The Obs inside a structure have to be defined on the same ensemble. 100 name : str 101 The name of the observable. 102 spec : str 103 Optional string that describes the contents of the file. 104 origin : str 105 Specify where the data has its origin. 106 symbol : list 107 A list of symbols that describe the observables to be written. May be empty. 108 enstag : str 109 Enstag that is written to pobs. If None, the ensemble name is used. 110 111 Returns 112 ------- 113 xml_str : str 114 XML formatted string of the input data 115 """ 116 117 od = {} 118 ename = obsl[0].e_names[0] 119 names = list(obsl[0].deltas.keys()) 120 nr = len(names) 121 onames = [name.replace('|', '') for name in names] 122 for o in obsl: 123 if len(o.e_names) != 1: 124 raise Exception('You try to export dobs to obs!') 125 if o.e_names[0] != ename: 126 raise Exception('You try to export dobs to obs!') 127 if len(o.deltas.keys()) != nr: 128 raise Exception('Incompatible obses in list') 129 od['observables'] = {} 130 od['observables']['schema'] = {'name': 'lattobs', 'version': '1.0'} 131 od['observables']['origin'] = { 132 'who': getpass.getuser(), 133 'date': str(datetime.datetime.now())[:-7], 134 'host': socket.gethostname(), 135 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 136 od['observables']['pobs'] = {} 137 pd = od['observables']['pobs'] 138 pd['spec'] = spec 139 pd['origin'] = origin 140 pd['name'] = name 141 if enstag: 142 if not isinstance(enstag, str): 143 raise Exception('enstag has to be a string!') 144 pd['enstag'] = enstag 145 else: 146 pd['enstag'] = ename 147 pd['nr'] = '%d' % (nr) 148 pd['array'] = [] 149 osymbol = 'cfg' 150 if not isinstance(symbol, list): 151 raise Exception('Symbol has to be a list!') 152 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 153 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 154 for s in symbol: 155 osymbol += ' %s' % s 156 for r in range(nr): 157 ad = {} 158 ad['id'] = onames[r] 159 Nconf = len(obsl[0].deltas[names[r]]) 160 layout = '%d i f%d' % (Nconf, len(obsl)) 161 ad['layout'] = layout 162 ad['symbol'] = osymbol 163 data = '' 164 for c in range(Nconf): 165 data += '%d ' % obsl[0].idl[names[r]][c] 166 for o in obsl: 167 num = o.deltas[names[r]][c] + o.r_values[names[r]] 168 if num == 0: 169 data += '0 ' 170 else: 171 data += '%1.16e ' % (num) 172 data += '\n' 173 ad['#data'] = data 174 pd['array'].append(ad) 175 176 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dict_to_xmlstring_spaces(od) 177 return rs
Export a list of Obs or structures containing Obs to an xml string according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
Returns
- xml_str (str): XML formatted string of the input data
180def write_pobs(obsl, fname, name, spec='', origin='', symbol=[], enstag=None, gz=True): 181 """Export a list of Obs or structures containing Obs to a .xml.gz file 182 according to the Zeuthen pobs format. 183 184 Tags are not written or recovered automatically. The separator | is removed from the replica names. 185 186 Parameters 187 ---------- 188 obsl : list 189 List of Obs that will be exported. 190 The Obs inside a structure have to be defined on the same ensemble. 191 fname : str 192 Filename of the output file. 193 name : str 194 The name of the observable. 195 spec : str 196 Optional string that describes the contents of the file. 197 origin : str 198 Specify where the data has its origin. 199 symbol : list 200 A list of symbols that describe the observables to be written. May be empty. 201 enstag : str 202 Enstag that is written to pobs. If None, the ensemble name is used. 203 gz : bool 204 If True, the output is a gzipped xml. If False, the output is an xml file. 205 206 Returns 207 ------- 208 None 209 """ 210 pobsstring = create_pobs_string(obsl, name, spec, origin, symbol, enstag) 211 212 if not fname.endswith('.xml') and not fname.endswith('.gz'): 213 fname += '.xml' 214 215 if gz: 216 if not fname.endswith('.gz'): 217 fname += '.gz' 218 219 fp = gzip.open(fname, 'wb') 220 fp.write(pobsstring.encode('utf-8')) 221 else: 222 fp = open(fname, 'w', encoding='utf-8') 223 fp.write(pobsstring) 224 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen pobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure have to be defined on the same ensemble.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- enstag (str): Enstag that is written to pobs. If None, the ensemble name is used.
- gz (bool): If True, the output is a gzipped xml. If False, the output is an xml file.
Returns
- None
301def read_pobs(fname, full_output=False, gz=True, separator_insertion=None): 302 """Import a list of Obs from an xml.gz file in the Zeuthen pobs format. 303 304 Tags are not written or recovered automatically. 305 306 Parameters 307 ---------- 308 fname : str 309 Filename of the input file. 310 full_output : bool 311 If True, a dict containing auxiliary information and the data is returned. 312 If False, only the data is returned as list. 313 separatior_insertion: str or int 314 str: replace all occurences of "separator_insertion" within the replica names 315 by "|%s" % (separator_insertion) when constructing the names of the replica. 316 int: Insert the separator "|" at the position given by separator_insertion. 317 None (default): Replica names remain unchanged. 318 319 Returns 320 ------- 321 res : list[Obs] 322 Imported data 323 or 324 res : dict 325 Imported data and meta-data 326 """ 327 328 if not fname.endswith('.xml') and not fname.endswith('.gz'): 329 fname += '.xml' 330 if gz: 331 if not fname.endswith('.gz'): 332 fname += '.gz' 333 with gzip.open(fname, 'r') as fin: 334 content = fin.read() 335 else: 336 if fname.endswith('.gz'): 337 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 338 with open(fname, 'r') as fin: 339 content = fin.read() 340 341 # parse xml file content 342 root = et.fromstring(content) 343 344 _check(root[2].tag == 'pobs') 345 pobs = root[2] 346 347 version = root[0][1].text.strip() 348 349 _check(root[1].tag == 'origin') 350 file_origin = _etree_to_dict(root[1])['origin'] 351 352 deltas = [] 353 names = [] 354 idl = [] 355 for i in range(5, len(pobs)): 356 delta, name, idx = _import_rdata(pobs[i]) 357 deltas.append(delta) 358 if separator_insertion is None: 359 pass 360 elif isinstance(separator_insertion, int): 361 name = name[:separator_insertion] + '|' + name[separator_insertion:] 362 elif isinstance(separator_insertion, str): 363 name = name.replace(separator_insertion, "|%s" % (separator_insertion)) 364 else: 365 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 366 names.append(name) 367 idl.append(idx) 368 res = [Obs([d[i] for d in deltas], names, idl=idl) for i in range(len(deltas[0]))] 369 370 descriptiond = {} 371 for i in range(4): 372 descriptiond[pobs[i].tag] = pobs[i].text.strip() 373 374 _check(pobs[4].tag == "nr") 375 376 _check(pobs[5].tag == 'array') 377 if pobs[5][1].tag == 'symbol': 378 symbol = pobs[5][1].text.strip() 379 descriptiond['symbol'] = symbol 380 381 if full_output: 382 retd = {} 383 tool = file_origin.get('tool', None) 384 if tool: 385 program = tool['name'] + ' ' + tool['version'] 386 else: 387 program = '' 388 retd['program'] = program 389 retd['version'] = version 390 retd['who'] = file_origin['who'] 391 retd['date'] = file_origin['date'] 392 retd['host'] = file_origin['host'] 393 retd['description'] = descriptiond 394 retd['obsdata'] = res 395 return retd 396 else: 397 return res
Import a list of Obs from an xml.gz file in the Zeuthen pobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str or int): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. None (default): Replica names remain unchanged.
Returns
- res (list[Obs]): Imported data
- or
- res (dict): Imported data and meta-data
401def import_dobs_string(content, full_output=False, separator_insertion=True): 402 """Import a list of Obs from a string in the Zeuthen dobs format. 403 404 Tags are not written or recovered automatically. 405 406 Parameters 407 ---------- 408 content : str 409 XML string containing the data 410 full_output : bool 411 If True, a dict containing auxiliary information and the data is returned. 412 If False, only the data is returned as list. 413 separatior_insertion: str, int or bool 414 str: replace all occurences of "separator_insertion" within the replica names 415 by "|%s" % (separator_insertion) when constructing the names of the replica. 416 int: Insert the separator "|" at the position given by separator_insertion. 417 True (default): separator "|" is inserted after len(ensname), assuming that the 418 ensemble name is a prefix to the replica name. 419 None or False: No separator is inserted. 420 421 Returns 422 ------- 423 res : list[Obs] 424 Imported data 425 or 426 res : dict 427 Imported data and meta-data 428 """ 429 430 root = et.fromstring(content) 431 432 _check(root.tag == 'OBSERVABLES') 433 _check(root[0].tag == 'SCHEMA') 434 version = root[0][1].text.strip() 435 436 _check(root[1].tag == 'origin') 437 file_origin = _etree_to_dict(root[1])['origin'] 438 439 _check(root[2].tag == 'dobs') 440 441 dobs = root[2] 442 443 descriptiond = {} 444 for i in range(3): 445 descriptiond[dobs[i].tag] = dobs[i].text.strip() 446 447 _check(dobs[3].tag == 'array') 448 449 symbol = [] 450 if dobs[3][1].tag == 'symbol': 451 symbol = dobs[3][1].text.strip() 452 descriptiond['symbol'] = symbol 453 mean = _import_array(dobs[3])[0] 454 455 _check(dobs[4].tag == "ne") 456 ne = int(dobs[4].text.strip()) 457 _check(dobs[5].tag == "nc") 458 459 idld = {} 460 deltad = {} 461 covd = {} 462 gradd = {} 463 names = [] 464 e_names = [] 465 enstags = {} 466 for k in range(6, len(list(dobs))): 467 if dobs[k].tag == "edata": 468 _check(dobs[k][0].tag == "enstag") 469 ename = dobs[k][0].text.strip() 470 e_names.append(ename) 471 _check(dobs[k][1].tag == "nr") 472 R = int(dobs[k][1].text.strip()) 473 for i in range(2, 2 + R): 474 deltas, rname, idx = _import_rdata(dobs[k][i]) 475 if separator_insertion is None or False: 476 pass 477 elif separator_insertion is True: 478 if rname.startswith(ename): 479 rname = rname[:len(ename)] + '|' + rname[len(ename):] 480 elif isinstance(separator_insertion, int): 481 rname = rname[:separator_insertion] + '|' + rname[separator_insertion:] 482 elif isinstance(separator_insertion, str): 483 rname = rname.replace(separator_insertion, "|%s" % (separator_insertion)) 484 else: 485 raise Exception("separator_insertion has to be string or int, is ", type(separator_insertion)) 486 if '|' in rname: 487 new_ename = rname[:rname.index('|')] 488 else: 489 new_ename = ename 490 enstags[new_ename] = ename 491 idld[rname] = idx 492 deltad[rname] = deltas 493 names.append(rname) 494 elif dobs[k].tag == "cdata": 495 cname, cov, grad = _import_cdata(dobs[k]) 496 covd[cname] = cov 497 if grad.shape[1] == 1: 498 gradd[cname] = [grad for i in range(len(mean))] 499 else: 500 gradd[cname] = grad.T 501 else: 502 _check(False) 503 names = list(set(names)) 504 505 for name in names: 506 for i in range(len(deltad[name])): 507 tmp = np.zeros_like(deltad[name][i]) 508 for j in range(len(deltad[name][i])): 509 if deltad[name][i][j] != 0.: 510 tmp[j] = deltad[name][i][j] + mean[i] 511 deltad[name][i] = tmp 512 513 res = [] 514 for i in range(len(mean)): 515 deltas = [] 516 idl = [] 517 obs_names = [] 518 for name in names: 519 h = np.unique(deltad[name][i]) 520 if len(h) == 1 and np.all(h == mean[i]): 521 continue 522 repdeltas = [] 523 repidl = [] 524 for j in range(len(deltad[name][i])): 525 if deltad[name][i][j] != 0.: 526 repdeltas.append(deltad[name][i][j]) 527 repidl.append(idld[name][j]) 528 if len(repdeltas) > 0: 529 obs_names.append(name) 530 deltas.append(repdeltas) 531 idl.append(repidl) 532 533 res.append(Obs(deltas, obs_names, idl=idl)) 534 res[-1]._value = mean[i] 535 _check(len(e_names) == ne) 536 537 cnames = list(covd.keys()) 538 for i in range(len(res)): 539 new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames} 540 for name in cnames: 541 if np.all(new_covobs[name].grad == 0): 542 del new_covobs[name] 543 cnames_loc = list(new_covobs.keys()) 544 for name in cnames_loc: 545 res[i].names.append(name) 546 res[i].shape[name] = 1 547 res[i].idl[name] = [] 548 res[i]._covobs = new_covobs 549 550 if symbol: 551 for i in range(len(res)): 552 res[i].tag = symbol[i] 553 if res[i].tag == 'None': 554 res[i].tag = None 555 if full_output: 556 retd = {} 557 tool = file_origin.get('tool', None) 558 if tool: 559 program = tool['name'] + ' ' + tool['version'] 560 else: 561 program = '' 562 retd['program'] = program 563 retd['version'] = version 564 retd['who'] = file_origin['who'] 565 retd['date'] = file_origin['date'] 566 retd['host'] = file_origin['host'] 567 retd['description'] = descriptiond 568 retd['enstags'] = enstags 569 retd['obsdata'] = res 570 return retd 571 else: 572 return res
Import a list of Obs from a string in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- content (str): XML string containing the data
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
Returns
- res (list[Obs]): Imported data
- or
- res (dict): Imported data and meta-data
575def read_dobs(fname, full_output=False, gz=True, separator_insertion=True): 576 """Import a list of Obs from an xml.gz file in the Zeuthen dobs format. 577 578 Tags are not written or recovered automatically. 579 580 Parameters 581 ---------- 582 fname : str 583 Filename of the input file. 584 full_output : bool 585 If True, a dict containing auxiliary information and the data is returned. 586 If False, only the data is returned as list. 587 gz : bool 588 If True, assumes that data is gzipped. If False, assumes XML file. 589 separatior_insertion: str, int or bool 590 str: replace all occurences of "separator_insertion" within the replica names 591 by "|%s" % (separator_insertion) when constructing the names of the replica. 592 int: Insert the separator "|" at the position given by separator_insertion. 593 True (default): separator "|" is inserted after len(ensname), assuming that the 594 ensemble name is a prefix to the replica name. 595 None or False: No separator is inserted. 596 597 Returns 598 ------- 599 res : list[Obs] 600 Imported data 601 or 602 res : dict 603 Imported data and meta-data 604 """ 605 606 if not fname.endswith('.xml') and not fname.endswith('.gz'): 607 fname += '.xml' 608 if gz: 609 if not fname.endswith('.gz'): 610 fname += '.gz' 611 with gzip.open(fname, 'r') as fin: 612 content = fin.read() 613 else: 614 if fname.endswith('.gz'): 615 warnings.warn("Trying to read from %s without unzipping!" % fname, UserWarning) 616 with open(fname, 'r') as fin: 617 content = fin.read() 618 619 return import_dobs_string(content, full_output, separator_insertion=separator_insertion)
Import a list of Obs from an xml.gz file in the Zeuthen dobs format.
Tags are not written or recovered automatically.
Parameters
- fname (str): Filename of the input file.
- full_output (bool): If True, a dict containing auxiliary information and the data is returned. If False, only the data is returned as list.
- gz (bool): If True, assumes that data is gzipped. If False, assumes XML file.
- separatior_insertion (str, int or bool): str: replace all occurences of "separator_insertion" within the replica names by "|%s" % (separator_insertion) when constructing the names of the replica. int: Insert the separator "|" at the position given by separator_insertion. True (default): separator "|" is inserted after len(ensname), assuming that the ensemble name is a prefix to the replica name. None or False: No separator is inserted.
Returns
- res (list[Obs]): Imported data
- or
- res (dict): Imported data and meta-data
681def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None): 682 """Generate the string for the export of a list of Obs or structures containing Obs 683 to a .xml.gz file according to the Zeuthen dobs format. 684 685 Tags are not written or recovered automatically. The separator |is removed from the replica names. 686 687 Parameters 688 ---------- 689 obsl : list 690 List of Obs that will be exported. 691 The Obs inside a structure do not have to be defined on the same set of configurations, 692 but the storage requirement is increased, if this is not the case. 693 name : str 694 The name of the observable. 695 spec : str 696 Optional string that describes the contents of the file. 697 origin : str 698 Specify where the data has its origin. 699 symbol : list 700 A list of symbols that describe the observables to be written. May be empty. 701 who : str 702 Provide the name of the person that exports the data. 703 enstags : dict 704 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 705 Otherwise, the ensemble name is used. 706 707 Returns 708 ------- 709 xml_str : str 710 XML string generated from the data 711 """ 712 if enstags is None: 713 enstags = {} 714 od = {} 715 r_names = [] 716 for o in obsl: 717 r_names += [name for name in o.names if name.split('|')[0] in o.mc_names] 718 r_names = sorted(set(r_names)) 719 mc_names = sorted(set([n.split('|')[0] for n in r_names])) 720 for tmpname in mc_names: 721 if tmpname not in enstags: 722 enstags[tmpname] = tmpname 723 ne = len(set(mc_names)) 724 cov_names = [] 725 for o in obsl: 726 cov_names += list(o.cov_names) 727 cov_names = sorted(set(cov_names)) 728 nc = len(set(cov_names)) 729 od['OBSERVABLES'] = {} 730 od['OBSERVABLES']['SCHEMA'] = {'NAME': 'lattobs', 'VERSION': '1.0'} 731 if who is None: 732 who = getpass.getuser() 733 od['OBSERVABLES']['origin'] = { 734 'who': who, 735 'date': str(datetime.datetime.now())[:-7], 736 'host': socket.gethostname(), 737 'tool': {'name': 'pyerrors', 'version': pyerrorsversion.__version__}} 738 od['OBSERVABLES']['dobs'] = {} 739 pd = od['OBSERVABLES']['dobs'] 740 pd['spec'] = spec 741 pd['origin'] = origin 742 pd['name'] = name 743 pd['array'] = {} 744 pd['array']['id'] = 'val' 745 pd['array']['layout'] = '1 f%d' % (len(obsl)) 746 osymbol = '' 747 if symbol: 748 if not isinstance(symbol, list): 749 raise Exception('Symbol has to be a list!') 750 if not (len(symbol) == 0 or len(symbol) == len(obsl)): 751 raise Exception('Symbol has to be a list of lenght 0 or %d!' % (len(obsl))) 752 osymbol = symbol[0] 753 for s in symbol[1:]: 754 osymbol += ' %s' % s 755 pd['array']['symbol'] = osymbol 756 757 pd['array']['#values'] = [' '.join(['%1.16e' % o.value for o in obsl])] 758 pd['ne'] = '%d' % (ne) 759 pd['nc'] = '%d' % (nc) 760 pd['edata'] = [] 761 for name in mc_names: 762 ed = {} 763 ed['enstag'] = enstags[name] 764 onames = sorted([n for n in r_names if (n.startswith(name + '|') or n == name)]) 765 nr = len(onames) 766 ed['nr'] = nr 767 ed[''] = [] 768 769 for r in range(nr): 770 ad = {} 771 repname = onames[r] 772 ad['id'] = repname.replace('|', '') 773 idx = _merge_idx([o.idl.get(repname, []) for o in obsl]) 774 Nconf = len(idx) 775 layout = '%d i f%d' % (Nconf, len(obsl)) 776 ad['layout'] = layout 777 data = '' 778 counters = [0 for o in obsl] 779 offsets = [o.r_values[repname] - o.value if repname in o.r_values else 0 for o in obsl] 780 for ci in idx: 781 data += '%d ' % ci 782 for oi in range(len(obsl)): 783 o = obsl[oi] 784 if repname in o.idl: 785 if counters[oi] < 0: 786 num = 0 787 if num == 0: 788 data += '0 ' 789 else: 790 data += '%1.16e ' % (num) 791 continue 792 if o.idl[repname][counters[oi]] == ci: 793 num = o.deltas[repname][counters[oi]] + offsets[oi] 794 if num == 0: 795 data += '0 ' 796 else: 797 data += '%1.16e ' % (num) 798 counters[oi] += 1 799 if counters[oi] >= len(o.idl[repname]): 800 counters[oi] = -1 801 else: 802 num = 0 803 if num == 0: 804 data += '0 ' 805 else: 806 data += '%1.16e ' % (num) 807 else: 808 data += '0 ' 809 data += '\n' 810 ad['#data'] = data 811 ed[''].append(ad) 812 pd['edata'].append(ed) 813 814 allcov = {} 815 for o in obsl: 816 for cname in o.cov_names: 817 if cname in allcov: 818 if not np.array_equal(allcov[cname], o.covobs[cname].cov): 819 raise Exception('Inconsistent covariance matrices for %s!' % (cname)) 820 else: 821 allcov[cname] = o.covobs[cname].cov 822 pd['cdata'] = [] 823 for cname in cov_names: 824 cd = {} 825 cd['id'] = cname 826 827 covd = {'id': 'cov'} 828 if allcov[cname].shape == (): 829 ncov = 1 830 covd['layout'] = '1 1 f' 831 covd['#data'] = '%1.14e' % (allcov[cname]) 832 else: 833 shape = allcov[cname].shape 834 assert (shape[0] == shape[1]) 835 ncov = shape[0] 836 covd['layout'] = '%d %d f' % (ncov, ncov) 837 ds = '' 838 for i in range(ncov): 839 for j in range(ncov): 840 val = allcov[cname][i][j] 841 if val == 0: 842 ds += '0 ' 843 else: 844 ds += '%1.14e ' % (val) 845 ds += '\n' 846 covd['#data'] = ds 847 848 gradd = {'id': 'grad'} 849 gradd['layout'] = '%d f%d' % (ncov, len(obsl)) 850 ds = '' 851 for i in range(ncov): 852 for o in obsl: 853 if cname in o.covobs: 854 val = o.covobs[cname].grad[i].item() 855 if val != 0: 856 ds += '%1.14e ' % (val) 857 else: 858 ds += '0 ' 859 else: 860 ds += '0 ' 861 gradd['#data'] = ds 862 cd['array'] = [covd, gradd] 863 pd['cdata'].append(cd) 864 865 rs = '<?xml version="1.0" encoding="utf-8"?>\n' + _dobsdict_to_xmlstring_spaces(od) 866 867 return rs
Generate the string for the export of a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator |is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
Returns
- xml_str (str): XML string generated from the data
870def write_dobs(obsl, fname, name, spec='dobs v1.0', origin='', symbol=[], who=None, enstags=None, gz=True): 871 """Export a list of Obs or structures containing Obs to a .xml.gz file 872 according to the Zeuthen dobs format. 873 874 Tags are not written or recovered automatically. The separator | is removed from the replica names. 875 876 Parameters 877 ---------- 878 obsl : list 879 List of Obs that will be exported. 880 The Obs inside a structure do not have to be defined on the same set of configurations, 881 but the storage requirement is increased, if this is not the case. 882 fname : str 883 Filename of the output file. 884 name : str 885 The name of the observable. 886 spec : str 887 Optional string that describes the contents of the file. 888 origin : str 889 Specify where the data has its origin. 890 symbol : list 891 A list of symbols that describe the observables to be written. May be empty. 892 who : str 893 Provide the name of the person that exports the data. 894 enstags : dict 895 Provide alternative enstag for ensembles in the form enstags = {ename: enstag} 896 Otherwise, the ensemble name is used. 897 gz : bool 898 If True, the output is a gzipped XML. If False, the output is a XML file. 899 900 Returns 901 ------- 902 None 903 """ 904 if enstags is None: 905 enstags = {} 906 907 dobsstring = create_dobs_string(obsl, name, spec, origin, symbol, who, enstags=enstags) 908 909 if not fname.endswith('.xml') and not fname.endswith('.gz'): 910 fname += '.xml' 911 912 if gz: 913 if not fname.endswith('.gz'): 914 fname += '.gz' 915 916 fp = gzip.open(fname, 'wb') 917 fp.write(dobsstring.encode('utf-8')) 918 else: 919 fp = open(fname, 'w', encoding='utf-8') 920 fp.write(dobsstring) 921 fp.close()
Export a list of Obs or structures containing Obs to a .xml.gz file according to the Zeuthen dobs format.
Tags are not written or recovered automatically. The separator | is removed from the replica names.
Parameters
- obsl (list): List of Obs that will be exported. The Obs inside a structure do not have to be defined on the same set of configurations, but the storage requirement is increased, if this is not the case.
- fname (str): Filename of the output file.
- name (str): The name of the observable.
- spec (str): Optional string that describes the contents of the file.
- origin (str): Specify where the data has its origin.
- symbol (list): A list of symbols that describe the observables to be written. May be empty.
- who (str): Provide the name of the person that exports the data.
- enstags (dict): Provide alternative enstag for ensembles in the form enstags = {ename: enstag} Otherwise, the ensemble name is used.
- gz (bool): If True, the output is a gzipped XML. If False, the output is a XML file.
Returns
- None