import os import fnmatch import re import numpy as np # Thinly-wrapped numpy from ..obs import Obs from .utils import sort_names, check_idl import itertools sep = "/" def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs): """Read sfcf files from given folder structure. Parameters ---------- path : str Path to the sfcf files. prefix : str Prefix of the sfcf files. name : str Name of the correlation function to read. quarks : str Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version corr_type : str Type of correlation function to read. Can be - 'bi' for boundary-inner - 'bb' for boundary-boundary - 'bib' for boundary-inner-boundary noffset : int Offset of the source (only relevant when wavefunctions are used) wf : int ID of wave function wf2 : int ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) im : bool if True, read imaginary instead of real part of the correlation function. names : list Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble version: str version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version cfg_separator : str String that separates the ensemble identifier from the configuration number (default 'n'). replica: list list of replica to be read, default is all files: list list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here. check_configs: list[list[int]] list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs Returns ------- result: list[Obs] list of Observables with length T, observable per timeslice. bb-type correlators have length 1. """ ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type], noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version, cfg_separator=cfg_separator, silent=silent, **kwargs) return ret[name][quarks][str(noffset)][str(wf)][str(wf2)] def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs): """Read sfcf files from given folder structure. Parameters ---------- path : str Path to the sfcf files. prefix : str Prefix of the sfcf files. name : str Name of the correlation function to read. quarks_list : list[str] Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version corr_type_list : list[str] Type of correlation function to read. Can be - 'bi' for boundary-inner - 'bb' for boundary-boundary - 'bib' for boundary-inner-boundary noffset_list : list[int] Offset of the source (only relevant when wavefunctions are used) wf_list : int ID of wave function wf2_list : list[int] ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) im : bool if True, read imaginary instead of real part of the correlation function. names : list Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble version: str version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version cfg_separator : str String that separates the ensemble identifier from the configuration number (default 'n'). replica: list list of replica to be read, default is all files: list[list[int]] list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here. check_configs: list[list[int]] list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs Returns ------- result: dict[list[Obs]] dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs] """ if kwargs.get('im'): im = 1 part = 'imaginary' else: im = 0 part = 'real' known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] if version not in known_versions: raise Exception("This version is not known!") if (version[-1] == "c"): appended = False compact = True version = version[:-1] elif (version[-1] == "a"): appended = True compact = False version = version[:-1] else: compact = False appended = False ls = [] if "replica" in kwargs: ls = kwargs.get("replica") else: for (dirpath, dirnames, filenames) in os.walk(path): if not appended: ls.extend(dirnames) else: ls.extend(filenames) break if not ls: raise Exception('Error, directory not found') # Exclude folders with different names for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set([exc])) if not appended: ls = sort_names(ls) replica = len(ls) else: replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) if replica == 0: raise Exception('No replica found in directory') if not silent: print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica') if 'names' in kwargs: new_names = kwargs.get('names') if len(new_names) != len(set(new_names)): raise Exception("names are not unique!") if len(new_names) != replica: raise Exception('names should have the length', replica) else: ens_name = kwargs.get("ens_name") if not appended: new_names = _get_rep_names(ls, ens_name) else: new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name) new_names = sort_names(new_names) idl = [] noffset_list = [str(x) for x in noffset_list] wf_list = [str(x) for x in wf_list] wf2_list = [str(x) for x in wf2_list] # setup dict structures intern = {} for name, corr_type in zip(name_list, corr_type_list): intern[name] = {} b2b, single = _extract_corr_type(corr_type) intern[name]["b2b"] = b2b intern[name]["single"] = single intern[name]["spec"] = {} for quarks in quarks_list: intern[name]["spec"][quarks] = {} for off in noffset_list: intern[name]["spec"][quarks][off] = {} for w in wf_list: intern[name]["spec"][quarks][off][w] = {} for w2 in wf2_list: intern[name]["spec"][quarks][off][w][w2] = {} intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks) internal_ret_dict = {} needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list) for key in needed_keys: internal_ret_dict[key] = [] if not appended: for i, item in enumerate(ls): rep_path = path + '/' + item if "files" in kwargs: files = kwargs.get("files") if isinstance(files, list): if all(isinstance(f, list) for f in files): files = files[i] elif all(isinstance(f, str) for f in files): files = files else: raise TypeError("files has to be of type list[list[str]] or list[str]!") else: raise TypeError("files has to be of type list[list[str]] or list[str]!") else: files = [] sub_ls = _find_files(rep_path, prefix, compact, files) rep_idl = [] no_cfg = len(sub_ls) for cfg in sub_ls: try: if compact: rep_idl.append(int(cfg.split(cfg_separator)[-1])) else: rep_idl.append(int(cfg[3:])) except Exception: raise Exception("Couldn't parse idl from directory, problem with file " + cfg) rep_idl.sort() # maybe there is a better way to print the idls if not silent: print(item, ':', no_cfg, ' configurations') idl.append(rep_idl) # here we have found all the files we need to look into. if i == 0: if version != "0.0" and compact: file = path + '/' + item + '/' + sub_ls[0] for name in name_list: if version == "0.0" or not compact: file = path + '/' + item + '/' + sub_ls[0] + '/' + name for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list): specs = _key2specs(key) quarks = specs[0] off = specs[1] w = specs[2] w2 = specs[3] # here, we want to find the place within the file, # where the correlator we need is stored. # to do so, the pattern needed is put together # from the input values start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent) intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read intern[name]["T"] = T # preparing the datastructure # the correlators get parsed into... deltas = [] for j in range(intern[name]["T"]): deltas.append([]) internal_ret_dict[sep.join([name, key])] = deltas if compact: rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im) for key in needed_keys: name = _key2specs(key)[0] for t in range(intern[name]["T"]): internal_ret_dict[key][t].append(rep_deltas[key][t]) else: for key in needed_keys: rep_data = [] name = _key2specs(key)[0] for subitem in sub_ls: cfg_path = path + '/' + item + '/' + subitem file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im) rep_data.append(file_data) print(rep_data) for t in range(intern[name]["T"]): internal_ret_dict[key][t].append([]) for cfg in range(no_cfg): internal_ret_dict[key][t][i].append(rep_data[cfg][key][t]) else: for key in needed_keys: specs = _key2specs(key) name = specs[0] quarks = specs[1] off = specs[2] w = specs[3] w2 = specs[4] if "files" in kwargs: if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")): name_ls = kwargs.get("files") else: raise TypeError("In append mode, files has to be of type list[str]!") else: name_ls = ls for exc in name_ls: if not fnmatch.fnmatch(exc, prefix + '*.' + name): name_ls = list(set(name_ls) - set([exc])) name_ls = sort_names(name_ls) pattern = intern[name]['spec'][quarks][off][w][w2]['pattern'] deltas = [] for rep, file in enumerate(name_ls): rep_idl = [] filename = path + '/' + file T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single']) if rep == 0: intern[name]['T'] = T for t in range(intern[name]['T']): deltas.append([]) for t in range(intern[name]['T']): deltas[t].append(rep_data[t]) internal_ret_dict[key] = deltas if name == name_list[0]: idl.append(rep_idl) if kwargs.get("check_configs") is True: if not silent: print("Checking for missing configs...") che = kwargs.get("check_configs") if not (len(che) == len(idl)): raise Exception("check_configs has to be the same length as replica!") for r in range(len(idl)): if not silent: print("checking " + new_names[r]) check_idl(idl[r], che[r]) if not silent: print("Done") result_dict = {} if keyed_out: for key in needed_keys: result = [] for t in range(intern[name]["T"]): result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) result_dict[key] = result else: for name in name_list: result_dict[name] = {} for quarks in quarks_list: result_dict[name][quarks] = {} for off in noffset_list: result_dict[name][quarks][off] = {} for w in wf_list: result_dict[name][quarks][off][w] = {} for w2 in wf2_list: key = _specs2key(name, quarks, off, w, w2) result = [] for t in range(intern[name]["T"]): result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl)) result_dict[name][quarks][str(off)][str(w)][str(w2)] = result return result_dict def _lists2key(*lists): keys = [] for tup in itertools.product(*lists): keys.append(sep.join(tup)) return keys def _key2specs(key): return key.split(sep) def _specs2key(*specs): return sep.join(specs) def _read_o_file(cfg_path, name, needed_keys, intern, version, im): return_vals = {} for key in needed_keys: file = cfg_path + '/' + name specs = _key2specs(key) if specs[0] == name: with open(file) as fp: lines = fp.readlines() quarks = specs[1] off = specs[2] w = specs[3] w2 = specs[4] T = intern[name]["T"] start_read = intern[name]["spec"][quarks][off][w][w2]["start"] deltas = [] for line in lines[start_read:start_read + T]: floats = list(map(float, line.split())) if version == "0.0": deltas.append(floats[im - intern[name]["single"]]) else: deltas.append(floats[1 + im - intern[name]["single"]]) return_vals[key] = deltas return return_vals def _extract_corr_type(corr_type): if corr_type == 'bb': b2b = True single = True elif corr_type == 'bib': b2b = True single = False else: b2b = False single = False return b2b, single def _find_files(rep_path, prefix, compact, files=[]): sub_ls = [] if not files == []: files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: for (dirpath, dirnames, filenames) in os.walk(rep_path): if compact: sub_ls.extend(filenames) else: sub_ls.extend(dirnames) break if compact: for exc in sub_ls: if not fnmatch.fnmatch(exc, prefix + '*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: for exc in sub_ls: if not fnmatch.fnmatch(exc, 'cfg*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(x[3:])) files = sub_ls if len(files) == 0: raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.") return files def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks): if version == "0.0": pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf) if b2b: pattern += ", wf_2 " + str(wf2) qs = quarks.split(" ") pattern += " : " + qs[0] + " - " + qs[1] else: pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) return pattern def _find_correlator(file_name, version, pattern, b2b, silent=False): T = 0 with open(file_name, "r") as my_file: content = my_file.read() match = re.search(pattern, content) if match: if version == "0.0": start_read = content.count('\n', 0, match.start()) + 1 T = content.count('\n', start_read) else: start_read = content.count('\n', 0, match.start()) + 5 + b2b end_match = re.search(r'\n\s*\n', content[match.start():]) T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b if not T > 0: raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!") if not silent: print(T, 'entries, starting to read in line', start_read) else: raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.') return start_read, T def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im): return_vals = {} with open(rep_path + cfg_file) as fp: lines = fp.readlines() for key in needed_keys: keys = _key2specs(key) name = keys[0] quarks = keys[1] off = keys[2] w = keys[3] w2 = keys[4] T = intern[name]["T"] start_read = intern[name]["spec"][quarks][off][w][w2]["start"] # check, if the correlator is in fact # printed completely if (start_read + T + 1 > len(lines)): raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?") corr_lines = lines[start_read - 6: start_read + T] t_vals = [] if corr_lines[1 - intern[name]["b2b"]].strip() != 'name ' + name: raise Exception('Wrong format in file', cfg_file) for k in range(6, T + 6): floats = list(map(float, corr_lines[k].split())) t_vals.append(floats[-2:][im]) return_vals[key] = t_vals return return_vals def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im): rep_path = path + '/' + rep + '/' no_cfg = len(sub_ls) return_vals = {} for key in needed_keys: name = _key2specs(key)[0] deltas = [] for t in range(intern[name]["T"]): deltas.append(np.zeros(no_cfg)) return_vals[key] = deltas for cfg in range(no_cfg): cfg_file = sub_ls[cfg] cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im) for key in needed_keys: name = _key2specs(key)[0] for t in range(intern[name]["T"]): return_vals[key][t][cfg] = cfg_data[key][t] return return_vals def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single): try: idl = int(chunk[gauge_line].split(cfg_sep)[-1]) except Exception: raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line) found_pat = "" data = [] for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): for t, line in enumerate(chunk[start_read:start_read + T]): floats = list(map(float, line.split())) data.append(floats[im + 1 - single]) return idl, data def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single): with open(filename, 'r') as fp: content = fp.readlines() data_starts = [] for linenumber, line in enumerate(content): if "[run]" in line: data_starts.append(linenumber) if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1: raise Exception("Irregularities in file structure found, not all runs have the same output length") chunk = content[:data_starts[1]] for linenumber, line in enumerate(chunk): if line.startswith("gauge_name"): gauge_line = linenumber elif line.startswith("[correlator]"): corr_line = linenumber found_pat = "" for li in chunk[corr_line + 1: corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): start_read = corr_line + 7 + b2b break else: raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename) endline = corr_line + 6 + b2b while not chunk[endline] == "\n": endline += 1 T = endline - start_read # all other chunks should follow the same structure rep_idl = [] rep_data = [] for cnfg in range(len(data_starts)): start = data_starts[cnfg] stop = start + data_starts[1] chunk = content[start:stop] idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single) rep_idl.append(idl) rep_data.append(data) data = [] for t in range(T): data.append([]) for c in range(len(rep_data)): data[t].append(rep_data[c][t]) return T, rep_idl, data def _get_rep_names(ls, ens_name=None): new_names = [] for entry in ls: try: idx = entry.index('r') except Exception: raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") if ens_name: new_names.append('ens_name' + '|' + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) return new_names def _get_appended_rep_names(ls, prefix, name, ens_name=None): new_names = [] for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) for entry in ls: myentry = entry[:-len(name) - 1] try: idx = myentry.index('r') except Exception: raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") if ens_name: new_names.append('ens_name' + '|' + entry[idx:]) else: new_names.append(myentry[:idx] + '|' + myentry[idx:]) return new_names