pyerrors.input.sfcf
1import os 2import fnmatch 3import re 4import numpy as np # Thinly-wrapped numpy 5from ..obs import Obs 6from .utils import sort_names, check_idl 7import itertools 8 9 10sep = "/" 11 12 13def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs): 14 """Read sfcf files from given folder structure. 15 16 Parameters 17 ---------- 18 path : str 19 Path to the sfcf files. 20 prefix : str 21 Prefix of the sfcf files. 22 name : str 23 Name of the correlation function to read. 24 quarks : str 25 Label of the quarks used in the sfcf input file. e.g. "quark quark" 26 for version 0.0 this does NOT need to be given with the typical " - " 27 that is present in the output file, 28 this is done automatically for this version 29 corr_type : str 30 Type of correlation function to read. Can be 31 - 'bi' for boundary-inner 32 - 'bb' for boundary-boundary 33 - 'bib' for boundary-inner-boundary 34 noffset : int 35 Offset of the source (only relevant when wavefunctions are used) 36 wf : int 37 ID of wave function 38 wf2 : int 39 ID of the second wavefunction 40 (only relevant for boundary-to-boundary correlation functions) 41 im : bool 42 if True, read imaginary instead of real part 43 of the correlation function. 44 names : list 45 Alternative labeling for replicas/ensembles. 46 Has to have the appropriate length 47 ens_name : str 48 replaces the name of the ensemble 49 version: str 50 version of SFCF, with which the measurement was done. 51 if the compact output option (-c) was specified, 52 append a "c" to the version (e.g. "1.0c") 53 if the append output option (-a) was specified, 54 append an "a" to the version 55 cfg_separator : str 56 String that separates the ensemble identifier from the configuration number (default 'n'). 57 replica: list 58 list of replica to be read, default is all 59 files: list 60 list of files to be read per replica, default is all. 61 for non-compact output format, hand the folders to be read here. 62 check_configs: list[list[int]] 63 list of list of supposed configs, eg. [range(1,1000)] 64 for one replicum with 1000 configs 65 66 Returns 67 ------- 68 result: list[Obs] 69 list of Observables with length T, observable per timeslice. 70 bb-type correlators have length 1. 71 """ 72 ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type], 73 noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version, 74 cfg_separator=cfg_separator, silent=silent, **kwargs) 75 return ret[name][quarks][str(noffset)][str(wf)][str(wf2)] 76 77 78def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs): 79 """Read sfcf files from given folder structure. 80 81 Parameters 82 ---------- 83 path : str 84 Path to the sfcf files. 85 prefix : str 86 Prefix of the sfcf files. 87 name : str 88 Name of the correlation function to read. 89 quarks_list : list[str] 90 Label of the quarks used in the sfcf input file. e.g. "quark quark" 91 for version 0.0 this does NOT need to be given with the typical " - " 92 that is present in the output file, 93 this is done automatically for this version 94 corr_type_list : list[str] 95 Type of correlation function to read. Can be 96 - 'bi' for boundary-inner 97 - 'bb' for boundary-boundary 98 - 'bib' for boundary-inner-boundary 99 noffset_list : list[int] 100 Offset of the source (only relevant when wavefunctions are used) 101 wf_list : int 102 ID of wave function 103 wf2_list : list[int] 104 ID of the second wavefunction 105 (only relevant for boundary-to-boundary correlation functions) 106 im : bool 107 if True, read imaginary instead of real part 108 of the correlation function. 109 names : list 110 Alternative labeling for replicas/ensembles. 111 Has to have the appropriate length 112 ens_name : str 113 replaces the name of the ensemble 114 version: str 115 version of SFCF, with which the measurement was done. 116 if the compact output option (-c) was specified, 117 append a "c" to the version (e.g. "1.0c") 118 if the append output option (-a) was specified, 119 append an "a" to the version 120 cfg_separator : str 121 String that separates the ensemble identifier from the configuration number (default 'n'). 122 replica: list 123 list of replica to be read, default is all 124 files: list[list[int]] 125 list of files to be read per replica, default is all. 126 for non-compact output format, hand the folders to be read here. 127 check_configs: list[list[int]] 128 list of list of supposed configs, eg. [range(1,1000)] 129 for one replicum with 1000 configs 130 rep_string: str 131 Separator of ensemble name and replicum. Example: In "ensAr0", "r" would be the separator string. 132 Returns 133 ------- 134 result: dict[list[Obs]] 135 dict with one of the following properties: 136 if keyed_out: 137 dict[key] = list[Obs] 138 where key has the form name/quarks/offset/wf/wf2 139 if not keyed_out: 140 dict[name][quarks][offset][wf][wf2] = list[Obs] 141 """ 142 143 if kwargs.get('im'): 144 im = 1 145 part = 'imaginary' 146 else: 147 im = 0 148 part = 'real' 149 150 known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] 151 152 if version not in known_versions: 153 raise Exception("This version is not known!") 154 if (version[-1] == "c"): 155 appended = False 156 compact = True 157 version = version[:-1] 158 elif (version[-1] == "a"): 159 appended = True 160 compact = False 161 version = version[:-1] 162 else: 163 compact = False 164 appended = False 165 ls = [] 166 if "replica" in kwargs: 167 ls = kwargs.get("replica") 168 else: 169 for (dirpath, dirnames, filenames) in os.walk(path): 170 if not appended: 171 ls.extend(dirnames) 172 else: 173 ls.extend(filenames) 174 break 175 if not ls: 176 raise Exception('Error, directory not found') 177 # Exclude folders with different names 178 for exc in ls: 179 if not fnmatch.fnmatch(exc, prefix + '*'): 180 ls = list(set(ls) - set([exc])) 181 182 if not appended: 183 ls = sort_names(ls) 184 replica = len(ls) 185 186 else: 187 replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) 188 if replica == 0: 189 raise Exception('No replica found in directory') 190 if not silent: 191 print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica') 192 193 if 'names' in kwargs: 194 new_names = kwargs.get('names') 195 if len(new_names) != len(set(new_names)): 196 raise Exception("names are not unique!") 197 if len(new_names) != replica: 198 raise Exception('names should have the length', replica) 199 200 else: 201 ens_name = kwargs.get("ens_name") 202 if not appended: 203 new_names = _get_rep_names(ls, ens_name, rep_sep=(kwargs.get('rep_string', 'r'))) 204 else: 205 new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name, rep_sep=(kwargs.get('rep_string', 'r'))) 206 new_names = sort_names(new_names) 207 208 idl = [] 209 210 noffset_list = [str(x) for x in noffset_list] 211 wf_list = [str(x) for x in wf_list] 212 wf2_list = [str(x) for x in wf2_list] 213 214 # setup dict structures 215 intern = {} 216 for name, corr_type in zip(name_list, corr_type_list): 217 intern[name] = {} 218 b2b, single = _extract_corr_type(corr_type) 219 intern[name]["b2b"] = b2b 220 intern[name]["single"] = single 221 intern[name]["spec"] = {} 222 for quarks in quarks_list: 223 intern[name]["spec"][quarks] = {} 224 for off in noffset_list: 225 intern[name]["spec"][quarks][off] = {} 226 for w in wf_list: 227 intern[name]["spec"][quarks][off][w] = {} 228 if b2b: 229 for w2 in wf2_list: 230 intern[name]["spec"][quarks][off][w][w2] = {} 231 intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks) 232 else: 233 intern[name]["spec"][quarks][off][w]["0"] = {} 234 intern[name]["spec"][quarks][off][w]["0"]["pattern"] = _make_pattern(version, name, off, w, 0, intern[name]['b2b'], quarks) 235 236 internal_ret_dict = {} 237 needed_keys = [] 238 for name, corr_type in zip(name_list, corr_type_list): 239 b2b, single = _extract_corr_type(corr_type) 240 if b2b: 241 needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, wf2_list)) 242 else: 243 needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, ["0"])) 244 245 for key in needed_keys: 246 internal_ret_dict[key] = [] 247 248 if not appended: 249 for i, item in enumerate(ls): 250 rep_path = path + '/' + item 251 if "files" in kwargs: 252 files = kwargs.get("files") 253 if isinstance(files, list): 254 if all(isinstance(f, list) for f in files): 255 files = files[i] 256 elif all(isinstance(f, str) for f in files): 257 files = files 258 else: 259 raise TypeError("files has to be of type list[list[str]] or list[str]!") 260 else: 261 raise TypeError("files has to be of type list[list[str]] or list[str]!") 262 263 else: 264 files = [] 265 sub_ls = _find_files(rep_path, prefix, compact, files) 266 rep_idl = [] 267 no_cfg = len(sub_ls) 268 for cfg in sub_ls: 269 try: 270 if compact: 271 rep_idl.append(int(cfg.split(cfg_separator)[-1])) 272 else: 273 rep_idl.append(int(cfg[3:])) 274 except Exception: 275 raise Exception("Couldn't parse idl from directory, problem with file " + cfg) 276 rep_idl.sort() 277 # maybe there is a better way to print the idls 278 if not silent: 279 print(item, ':', no_cfg, ' configurations') 280 idl.append(rep_idl) 281 # here we have found all the files we need to look into. 282 if i == 0: 283 if version != "0.0" and compact: 284 file = path + '/' + item + '/' + sub_ls[0] 285 for name_index, name in enumerate(name_list): 286 if version == "0.0" or not compact: 287 file = path + '/' + item + '/' + sub_ls[0] + '/' + name 288 if corr_type_list[name_index] == 'bi': 289 name_keys = _lists2key(quarks_list, noffset_list, wf_list, ["0"]) 290 else: 291 name_keys = _lists2key(quarks_list, noffset_list, wf_list, wf2_list) 292 for key in name_keys: 293 specs = _key2specs(key) 294 quarks = specs[0] 295 off = specs[1] 296 w = specs[2] 297 w2 = specs[3] 298 # here, we want to find the place within the file, 299 # where the correlator we need is stored. 300 # to do so, the pattern needed is put together 301 # from the input values 302 start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent) 303 intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read 304 intern[name]["T"] = T 305 # preparing the datastructure 306 # the correlators get parsed into... 307 deltas = [] 308 for j in range(intern[name]["T"]): 309 deltas.append([]) 310 internal_ret_dict[sep.join([name, key])] = deltas 311 312 if compact: 313 rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im) 314 for key in needed_keys: 315 name = _key2specs(key)[0] 316 for t in range(intern[name]["T"]): 317 internal_ret_dict[key][t].append(rep_deltas[key][t]) 318 else: 319 for key in needed_keys: 320 rep_data = [] 321 name = _key2specs(key)[0] 322 for subitem in sub_ls: 323 cfg_path = path + '/' + item + '/' + subitem 324 file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im) 325 rep_data.append(file_data) 326 for t in range(intern[name]["T"]): 327 internal_ret_dict[key][t].append([]) 328 for cfg in range(no_cfg): 329 internal_ret_dict[key][t][i].append(rep_data[cfg][key][t]) 330 else: 331 for key in needed_keys: 332 specs = _key2specs(key) 333 name = specs[0] 334 quarks = specs[1] 335 off = specs[2] 336 w = specs[3] 337 w2 = specs[4] 338 if "files" in kwargs: 339 if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")): 340 name_ls = kwargs.get("files") 341 else: 342 raise TypeError("In append mode, files has to be of type list[str]!") 343 else: 344 name_ls = ls 345 for exc in name_ls: 346 if not fnmatch.fnmatch(exc, prefix + '*.' + name): 347 name_ls = list(set(name_ls) - set([exc])) 348 name_ls = sort_names(name_ls) 349 pattern = intern[name]['spec'][quarks][off][w][w2]['pattern'] 350 deltas = [] 351 for rep, file in enumerate(name_ls): 352 rep_idl = [] 353 filename = path + '/' + file 354 T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single']) 355 if rep == 0: 356 intern[name]['T'] = T 357 for t in range(intern[name]['T']): 358 deltas.append([]) 359 for t in range(intern[name]['T']): 360 deltas[t].append(rep_data[t]) 361 internal_ret_dict[key] = deltas 362 if name == name_list[0]: 363 idl.append(rep_idl) 364 365 if kwargs.get("check_configs") is True: 366 if not silent: 367 print("Checking for missing configs...") 368 che = kwargs.get("check_configs") 369 if not (len(che) == len(idl)): 370 raise Exception("check_configs has to be the same length as replica!") 371 for r in range(len(idl)): 372 if not silent: 373 print("checking " + new_names[r]) 374 check_idl(idl[r], che[r]) 375 if not silent: 376 print("Done") 377 378 result_dict = {} 379 if keyed_out: 380 for key in needed_keys: 381 name = _key2specs(key)[0] 382 result = [] 383 for t in range(intern[name]["T"]): 384 result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) 385 result_dict[key] = result 386 else: 387 for name, corr_type in zip(name_list, corr_type_list): 388 result_dict[name] = {} 389 for quarks in quarks_list: 390 result_dict[name][quarks] = {} 391 for off in noffset_list: 392 result_dict[name][quarks][off] = {} 393 for w in wf_list: 394 result_dict[name][quarks][off][w] = {} 395 if corr_type != 'bi': 396 for w2 in wf2_list: 397 key = _specs2key(name, quarks, off, w, w2) 398 result = [] 399 for t in range(intern[name]["T"]): 400 result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) 401 result_dict[name][quarks][str(off)][str(w)][str(w2)] = result 402 else: 403 key = _specs2key(name, quarks, off, w, "0") 404 result = [] 405 for t in range(intern[name]["T"]): 406 result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) 407 result_dict[name][quarks][str(off)][str(w)][str(0)] = result 408 return result_dict 409 410 411def _lists2key(*lists): 412 keys = [] 413 for tup in itertools.product(*lists): 414 keys.append(sep.join(tup)) 415 return keys 416 417 418def _key2specs(key): 419 return key.split(sep) 420 421 422def _specs2key(*specs): 423 return sep.join(specs) 424 425 426def _read_o_file(cfg_path, name, needed_keys, intern, version, im): 427 return_vals = {} 428 for key in needed_keys: 429 file = cfg_path + '/' + name 430 specs = _key2specs(key) 431 if specs[0] == name: 432 with open(file) as fp: 433 lines = fp.readlines() 434 quarks = specs[1] 435 off = specs[2] 436 w = specs[3] 437 w2 = specs[4] 438 T = intern[name]["T"] 439 start_read = intern[name]["spec"][quarks][off][w][w2]["start"] 440 deltas = [] 441 for line in lines[start_read:start_read + T]: 442 floats = list(map(float, line.split())) 443 if version == "0.0": 444 deltas.append(floats[im - intern[name]["single"]]) 445 else: 446 deltas.append(floats[1 + im - intern[name]["single"]]) 447 return_vals[key] = deltas 448 return return_vals 449 450 451def _extract_corr_type(corr_type): 452 if corr_type == 'bb': 453 b2b = True 454 single = True 455 elif corr_type == 'bib': 456 b2b = True 457 single = False 458 else: 459 b2b = False 460 single = False 461 return b2b, single 462 463 464def _find_files(rep_path, prefix, compact, files=[]): 465 sub_ls = [] 466 if not files == []: 467 files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) 468 else: 469 for (dirpath, dirnames, filenames) in os.walk(rep_path): 470 if compact: 471 sub_ls.extend(filenames) 472 else: 473 sub_ls.extend(dirnames) 474 break 475 if compact: 476 for exc in sub_ls: 477 if not fnmatch.fnmatch(exc, prefix + '*'): 478 sub_ls = list(set(sub_ls) - set([exc])) 479 sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) 480 else: 481 for exc in sub_ls: 482 if not fnmatch.fnmatch(exc, 'cfg*'): 483 sub_ls = list(set(sub_ls) - set([exc])) 484 sub_ls.sort(key=lambda x: int(x[3:])) 485 files = sub_ls 486 if len(files) == 0: 487 raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.") 488 return files 489 490 491def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks): 492 if version == "0.0": 493 pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf) 494 if b2b: 495 pattern += ", wf_2 " + str(wf2) 496 qs = quarks.split(" ") 497 pattern += " : " + qs[0] + " - " + qs[1] 498 else: 499 pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) 500 if b2b: 501 pattern += '\nwf_2 ' + str(wf2) 502 return pattern 503 504 505def _find_correlator(file_name, version, pattern, b2b, silent=False): 506 T = 0 507 508 with open(file_name, "r") as my_file: 509 510 content = my_file.read() 511 match = re.search(pattern, content) 512 if match: 513 if version == "0.0": 514 start_read = content.count('\n', 0, match.start()) + 1 515 T = content.count('\n', start_read) 516 else: 517 start_read = content.count('\n', 0, match.start()) + 5 + b2b 518 end_match = re.search(r'\n\s*\n', content[match.start():]) 519 T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b 520 if not T > 0: 521 raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!") 522 if not silent: 523 print(T, 'entries, starting to read in line', start_read) 524 525 else: 526 raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.') 527 528 return start_read, T 529 530 531def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im): 532 return_vals = {} 533 with open(rep_path + cfg_file) as fp: 534 lines = fp.readlines() 535 for key in needed_keys: 536 keys = _key2specs(key) 537 name = keys[0] 538 quarks = keys[1] 539 off = keys[2] 540 w = keys[3] 541 w2 = keys[4] 542 543 T = intern[name]["T"] 544 start_read = intern[name]["spec"][quarks][off][w][w2]["start"] 545 # check, if the correlator is in fact 546 # printed completely 547 if (start_read + T + 1 > len(lines)): 548 raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?") 549 corr_lines = lines[start_read - 6: start_read + T] 550 t_vals = [] 551 552 if corr_lines[1 - intern[name]["b2b"]].strip() != 'name ' + name: 553 raise Exception('Wrong format in file', cfg_file) 554 555 for k in range(6, T + 6): 556 floats = list(map(float, corr_lines[k].split())) 557 t_vals.append(floats[-2:][im]) 558 return_vals[key] = t_vals 559 return return_vals 560 561 562def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im): 563 rep_path = path + '/' + rep + '/' 564 no_cfg = len(sub_ls) 565 566 return_vals = {} 567 for key in needed_keys: 568 name = _key2specs(key)[0] 569 deltas = [] 570 for t in range(intern[name]["T"]): 571 deltas.append(np.zeros(no_cfg)) 572 return_vals[key] = deltas 573 574 for cfg in range(no_cfg): 575 cfg_file = sub_ls[cfg] 576 cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im) 577 for key in needed_keys: 578 name = _key2specs(key)[0] 579 for t in range(intern[name]["T"]): 580 return_vals[key][t][cfg] = cfg_data[key][t] 581 return return_vals 582 583 584def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single): 585 try: 586 idl = int(chunk[gauge_line].split(cfg_sep)[-1]) 587 except Exception: 588 raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line) 589 590 found_pat = "" 591 data = [] 592 for li in chunk[corr_line + 1:corr_line + 6 + b2b]: 593 found_pat += li 594 if re.search(pattern, found_pat): 595 for t, line in enumerate(chunk[start_read:start_read + T]): 596 floats = list(map(float, line.split())) 597 data.append(floats[im + 1 - single]) 598 return idl, data 599 600 601def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single): 602 with open(filename, 'r') as fp: 603 content = fp.readlines() 604 data_starts = [] 605 for linenumber, line in enumerate(content): 606 if "[run]" in line: 607 data_starts.append(linenumber) 608 if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1: 609 raise Exception("Irregularities in file structure found, not all runs have the same output length") 610 chunk = content[:data_starts[1]] 611 for linenumber, line in enumerate(chunk): 612 if line.startswith("gauge_name"): 613 gauge_line = linenumber 614 elif line.startswith("[correlator]"): 615 corr_line = linenumber 616 found_pat = "" 617 for li in chunk[corr_line + 1: corr_line + 6 + b2b]: 618 found_pat += li 619 if re.search(pattern, found_pat): 620 start_read = corr_line + 7 + b2b 621 break 622 else: 623 raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename) 624 endline = corr_line + 6 + b2b 625 while not chunk[endline] == "\n": 626 endline += 1 627 T = endline - start_read 628 629 # all other chunks should follow the same structure 630 rep_idl = [] 631 rep_data = [] 632 633 for cnfg in range(len(data_starts)): 634 start = data_starts[cnfg] 635 stop = start + data_starts[1] 636 chunk = content[start:stop] 637 idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single) 638 rep_idl.append(idl) 639 rep_data.append(data) 640 641 data = [] 642 643 for t in range(T): 644 data.append([]) 645 for c in range(len(rep_data)): 646 data[t].append(rep_data[c][t]) 647 return T, rep_idl, data 648 649 650def _get_rep_names(ls, ens_name=None, rep_sep='r'): 651 new_names = [] 652 for entry in ls: 653 try: 654 idx = entry.index(rep_sep) 655 except Exception: 656 raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") 657 658 if ens_name: 659 new_names.append(ens_name + '|' + entry[idx:]) 660 else: 661 new_names.append(entry[:idx] + '|' + entry[idx:]) 662 return new_names 663 664 665def _get_appended_rep_names(ls, prefix, name, ens_name=None, rep_sep='r'): 666 new_names = [] 667 for exc in ls: 668 if not fnmatch.fnmatch(exc, prefix + '*.' + name): 669 ls = list(set(ls) - set([exc])) 670 ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) 671 for entry in ls: 672 myentry = entry[:-len(name) - 1] 673 try: 674 idx = myentry.index(rep_sep) 675 except Exception: 676 raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") 677 678 if ens_name: 679 new_names.append(ens_name + '|' + entry[idx:]) 680 else: 681 new_names.append(myentry[:idx] + '|' + myentry[idx:]) 682 return new_names
sep =
'/'
def
read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', silent=False, **kwargs):
14def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs): 15 """Read sfcf files from given folder structure. 16 17 Parameters 18 ---------- 19 path : str 20 Path to the sfcf files. 21 prefix : str 22 Prefix of the sfcf files. 23 name : str 24 Name of the correlation function to read. 25 quarks : str 26 Label of the quarks used in the sfcf input file. e.g. "quark quark" 27 for version 0.0 this does NOT need to be given with the typical " - " 28 that is present in the output file, 29 this is done automatically for this version 30 corr_type : str 31 Type of correlation function to read. Can be 32 - 'bi' for boundary-inner 33 - 'bb' for boundary-boundary 34 - 'bib' for boundary-inner-boundary 35 noffset : int 36 Offset of the source (only relevant when wavefunctions are used) 37 wf : int 38 ID of wave function 39 wf2 : int 40 ID of the second wavefunction 41 (only relevant for boundary-to-boundary correlation functions) 42 im : bool 43 if True, read imaginary instead of real part 44 of the correlation function. 45 names : list 46 Alternative labeling for replicas/ensembles. 47 Has to have the appropriate length 48 ens_name : str 49 replaces the name of the ensemble 50 version: str 51 version of SFCF, with which the measurement was done. 52 if the compact output option (-c) was specified, 53 append a "c" to the version (e.g. "1.0c") 54 if the append output option (-a) was specified, 55 append an "a" to the version 56 cfg_separator : str 57 String that separates the ensemble identifier from the configuration number (default 'n'). 58 replica: list 59 list of replica to be read, default is all 60 files: list 61 list of files to be read per replica, default is all. 62 for non-compact output format, hand the folders to be read here. 63 check_configs: list[list[int]] 64 list of list of supposed configs, eg. [range(1,1000)] 65 for one replicum with 1000 configs 66 67 Returns 68 ------- 69 result: list[Obs] 70 list of Observables with length T, observable per timeslice. 71 bb-type correlators have length 1. 72 """ 73 ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type], 74 noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version, 75 cfg_separator=cfg_separator, silent=silent, **kwargs) 76 return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]
Read sfcf files from given folder structure.
Parameters
- path (str): Path to the sfcf files.
- prefix (str): Prefix of the sfcf files.
- name (str): Name of the correlation function to read.
- quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
- corr_type (str):
Type of correlation function to read. Can be
- 'bi' for boundary-inner
- 'bb' for boundary-boundary
- 'bib' for boundary-inner-boundary
- noffset (int): Offset of the source (only relevant when wavefunctions are used)
- wf (int): ID of wave function
- wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
- im (bool): if True, read imaginary instead of real part of the correlation function.
- names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
- ens_name (str): replaces the name of the ensemble
- version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
- cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
- replica (list): list of replica to be read, default is all
- files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
- check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
- result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.
def
read_sfcf_multi( path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version='1.0c', cfg_separator='n', silent=False, keyed_out=False, **kwargs):
79def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs): 80 """Read sfcf files from given folder structure. 81 82 Parameters 83 ---------- 84 path : str 85 Path to the sfcf files. 86 prefix : str 87 Prefix of the sfcf files. 88 name : str 89 Name of the correlation function to read. 90 quarks_list : list[str] 91 Label of the quarks used in the sfcf input file. e.g. "quark quark" 92 for version 0.0 this does NOT need to be given with the typical " - " 93 that is present in the output file, 94 this is done automatically for this version 95 corr_type_list : list[str] 96 Type of correlation function to read. Can be 97 - 'bi' for boundary-inner 98 - 'bb' for boundary-boundary 99 - 'bib' for boundary-inner-boundary 100 noffset_list : list[int] 101 Offset of the source (only relevant when wavefunctions are used) 102 wf_list : int 103 ID of wave function 104 wf2_list : list[int] 105 ID of the second wavefunction 106 (only relevant for boundary-to-boundary correlation functions) 107 im : bool 108 if True, read imaginary instead of real part 109 of the correlation function. 110 names : list 111 Alternative labeling for replicas/ensembles. 112 Has to have the appropriate length 113 ens_name : str 114 replaces the name of the ensemble 115 version: str 116 version of SFCF, with which the measurement was done. 117 if the compact output option (-c) was specified, 118 append a "c" to the version (e.g. "1.0c") 119 if the append output option (-a) was specified, 120 append an "a" to the version 121 cfg_separator : str 122 String that separates the ensemble identifier from the configuration number (default 'n'). 123 replica: list 124 list of replica to be read, default is all 125 files: list[list[int]] 126 list of files to be read per replica, default is all. 127 for non-compact output format, hand the folders to be read here. 128 check_configs: list[list[int]] 129 list of list of supposed configs, eg. [range(1,1000)] 130 for one replicum with 1000 configs 131 rep_string: str 132 Separator of ensemble name and replicum. Example: In "ensAr0", "r" would be the separator string. 133 Returns 134 ------- 135 result: dict[list[Obs]] 136 dict with one of the following properties: 137 if keyed_out: 138 dict[key] = list[Obs] 139 where key has the form name/quarks/offset/wf/wf2 140 if not keyed_out: 141 dict[name][quarks][offset][wf][wf2] = list[Obs] 142 """ 143 144 if kwargs.get('im'): 145 im = 1 146 part = 'imaginary' 147 else: 148 im = 0 149 part = 'real' 150 151 known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] 152 153 if version not in known_versions: 154 raise Exception("This version is not known!") 155 if (version[-1] == "c"): 156 appended = False 157 compact = True 158 version = version[:-1] 159 elif (version[-1] == "a"): 160 appended = True 161 compact = False 162 version = version[:-1] 163 else: 164 compact = False 165 appended = False 166 ls = [] 167 if "replica" in kwargs: 168 ls = kwargs.get("replica") 169 else: 170 for (dirpath, dirnames, filenames) in os.walk(path): 171 if not appended: 172 ls.extend(dirnames) 173 else: 174 ls.extend(filenames) 175 break 176 if not ls: 177 raise Exception('Error, directory not found') 178 # Exclude folders with different names 179 for exc in ls: 180 if not fnmatch.fnmatch(exc, prefix + '*'): 181 ls = list(set(ls) - set([exc])) 182 183 if not appended: 184 ls = sort_names(ls) 185 replica = len(ls) 186 187 else: 188 replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) 189 if replica == 0: 190 raise Exception('No replica found in directory') 191 if not silent: 192 print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica') 193 194 if 'names' in kwargs: 195 new_names = kwargs.get('names') 196 if len(new_names) != len(set(new_names)): 197 raise Exception("names are not unique!") 198 if len(new_names) != replica: 199 raise Exception('names should have the length', replica) 200 201 else: 202 ens_name = kwargs.get("ens_name") 203 if not appended: 204 new_names = _get_rep_names(ls, ens_name, rep_sep=(kwargs.get('rep_string', 'r'))) 205 else: 206 new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name, rep_sep=(kwargs.get('rep_string', 'r'))) 207 new_names = sort_names(new_names) 208 209 idl = [] 210 211 noffset_list = [str(x) for x in noffset_list] 212 wf_list = [str(x) for x in wf_list] 213 wf2_list = [str(x) for x in wf2_list] 214 215 # setup dict structures 216 intern = {} 217 for name, corr_type in zip(name_list, corr_type_list): 218 intern[name] = {} 219 b2b, single = _extract_corr_type(corr_type) 220 intern[name]["b2b"] = b2b 221 intern[name]["single"] = single 222 intern[name]["spec"] = {} 223 for quarks in quarks_list: 224 intern[name]["spec"][quarks] = {} 225 for off in noffset_list: 226 intern[name]["spec"][quarks][off] = {} 227 for w in wf_list: 228 intern[name]["spec"][quarks][off][w] = {} 229 if b2b: 230 for w2 in wf2_list: 231 intern[name]["spec"][quarks][off][w][w2] = {} 232 intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks) 233 else: 234 intern[name]["spec"][quarks][off][w]["0"] = {} 235 intern[name]["spec"][quarks][off][w]["0"]["pattern"] = _make_pattern(version, name, off, w, 0, intern[name]['b2b'], quarks) 236 237 internal_ret_dict = {} 238 needed_keys = [] 239 for name, corr_type in zip(name_list, corr_type_list): 240 b2b, single = _extract_corr_type(corr_type) 241 if b2b: 242 needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, wf2_list)) 243 else: 244 needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, ["0"])) 245 246 for key in needed_keys: 247 internal_ret_dict[key] = [] 248 249 if not appended: 250 for i, item in enumerate(ls): 251 rep_path = path + '/' + item 252 if "files" in kwargs: 253 files = kwargs.get("files") 254 if isinstance(files, list): 255 if all(isinstance(f, list) for f in files): 256 files = files[i] 257 elif all(isinstance(f, str) for f in files): 258 files = files 259 else: 260 raise TypeError("files has to be of type list[list[str]] or list[str]!") 261 else: 262 raise TypeError("files has to be of type list[list[str]] or list[str]!") 263 264 else: 265 files = [] 266 sub_ls = _find_files(rep_path, prefix, compact, files) 267 rep_idl = [] 268 no_cfg = len(sub_ls) 269 for cfg in sub_ls: 270 try: 271 if compact: 272 rep_idl.append(int(cfg.split(cfg_separator)[-1])) 273 else: 274 rep_idl.append(int(cfg[3:])) 275 except Exception: 276 raise Exception("Couldn't parse idl from directory, problem with file " + cfg) 277 rep_idl.sort() 278 # maybe there is a better way to print the idls 279 if not silent: 280 print(item, ':', no_cfg, ' configurations') 281 idl.append(rep_idl) 282 # here we have found all the files we need to look into. 283 if i == 0: 284 if version != "0.0" and compact: 285 file = path + '/' + item + '/' + sub_ls[0] 286 for name_index, name in enumerate(name_list): 287 if version == "0.0" or not compact: 288 file = path + '/' + item + '/' + sub_ls[0] + '/' + name 289 if corr_type_list[name_index] == 'bi': 290 name_keys = _lists2key(quarks_list, noffset_list, wf_list, ["0"]) 291 else: 292 name_keys = _lists2key(quarks_list, noffset_list, wf_list, wf2_list) 293 for key in name_keys: 294 specs = _key2specs(key) 295 quarks = specs[0] 296 off = specs[1] 297 w = specs[2] 298 w2 = specs[3] 299 # here, we want to find the place within the file, 300 # where the correlator we need is stored. 301 # to do so, the pattern needed is put together 302 # from the input values 303 start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent) 304 intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read 305 intern[name]["T"] = T 306 # preparing the datastructure 307 # the correlators get parsed into... 308 deltas = [] 309 for j in range(intern[name]["T"]): 310 deltas.append([]) 311 internal_ret_dict[sep.join([name, key])] = deltas 312 313 if compact: 314 rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im) 315 for key in needed_keys: 316 name = _key2specs(key)[0] 317 for t in range(intern[name]["T"]): 318 internal_ret_dict[key][t].append(rep_deltas[key][t]) 319 else: 320 for key in needed_keys: 321 rep_data = [] 322 name = _key2specs(key)[0] 323 for subitem in sub_ls: 324 cfg_path = path + '/' + item + '/' + subitem 325 file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im) 326 rep_data.append(file_data) 327 for t in range(intern[name]["T"]): 328 internal_ret_dict[key][t].append([]) 329 for cfg in range(no_cfg): 330 internal_ret_dict[key][t][i].append(rep_data[cfg][key][t]) 331 else: 332 for key in needed_keys: 333 specs = _key2specs(key) 334 name = specs[0] 335 quarks = specs[1] 336 off = specs[2] 337 w = specs[3] 338 w2 = specs[4] 339 if "files" in kwargs: 340 if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")): 341 name_ls = kwargs.get("files") 342 else: 343 raise TypeError("In append mode, files has to be of type list[str]!") 344 else: 345 name_ls = ls 346 for exc in name_ls: 347 if not fnmatch.fnmatch(exc, prefix + '*.' + name): 348 name_ls = list(set(name_ls) - set([exc])) 349 name_ls = sort_names(name_ls) 350 pattern = intern[name]['spec'][quarks][off][w][w2]['pattern'] 351 deltas = [] 352 for rep, file in enumerate(name_ls): 353 rep_idl = [] 354 filename = path + '/' + file 355 T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single']) 356 if rep == 0: 357 intern[name]['T'] = T 358 for t in range(intern[name]['T']): 359 deltas.append([]) 360 for t in range(intern[name]['T']): 361 deltas[t].append(rep_data[t]) 362 internal_ret_dict[key] = deltas 363 if name == name_list[0]: 364 idl.append(rep_idl) 365 366 if kwargs.get("check_configs") is True: 367 if not silent: 368 print("Checking for missing configs...") 369 che = kwargs.get("check_configs") 370 if not (len(che) == len(idl)): 371 raise Exception("check_configs has to be the same length as replica!") 372 for r in range(len(idl)): 373 if not silent: 374 print("checking " + new_names[r]) 375 check_idl(idl[r], che[r]) 376 if not silent: 377 print("Done") 378 379 result_dict = {} 380 if keyed_out: 381 for key in needed_keys: 382 name = _key2specs(key)[0] 383 result = [] 384 for t in range(intern[name]["T"]): 385 result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) 386 result_dict[key] = result 387 else: 388 for name, corr_type in zip(name_list, corr_type_list): 389 result_dict[name] = {} 390 for quarks in quarks_list: 391 result_dict[name][quarks] = {} 392 for off in noffset_list: 393 result_dict[name][quarks][off] = {} 394 for w in wf_list: 395 result_dict[name][quarks][off][w] = {} 396 if corr_type != 'bi': 397 for w2 in wf2_list: 398 key = _specs2key(name, quarks, off, w, w2) 399 result = [] 400 for t in range(intern[name]["T"]): 401 result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) 402 result_dict[name][quarks][str(off)][str(w)][str(w2)] = result 403 else: 404 key = _specs2key(name, quarks, off, w, "0") 405 result = [] 406 for t in range(intern[name]["T"]): 407 result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) 408 result_dict[name][quarks][str(off)][str(w)][str(0)] = result 409 return result_dict
Read sfcf files from given folder structure.
Parameters
- path (str): Path to the sfcf files.
- prefix (str): Prefix of the sfcf files.
- name (str): Name of the correlation function to read.
- quarks_list (list[str]): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
- corr_type_list (list[str]):
Type of correlation function to read. Can be
- 'bi' for boundary-inner
- 'bb' for boundary-boundary
- 'bib' for boundary-inner-boundary
- noffset_list (list[int]): Offset of the source (only relevant when wavefunctions are used)
- wf_list (int): ID of wave function
- wf2_list (list[int]): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
- im (bool): if True, read imaginary instead of real part of the correlation function.
- names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
- ens_name (str): replaces the name of the ensemble
- version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
- cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
- replica (list): list of replica to be read, default is all
- files (list[list[int]]): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
- check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
- rep_string (str): Separator of ensemble name and replicum. Example: In "ensAr0", "r" would be the separator string.
Returns
- result (dict[list[Obs]]): dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs]