From e9b59ec9ed359e65c6557fb55671f7f3e1e33754 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 27 Sep 2023 14:17:49 +0000 Subject: [PATCH] read_sfcf_multi running with compact format --- pyerrors/input/sfcf.py | 340 ++++++++++++++++++++++++++--------------- 1 file changed, 217 insertions(+), 123 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index a18c6a57..ce277167 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -3,6 +3,7 @@ import fnmatch import re import numpy as np # Thinly-wrapped numpy from ..obs import Obs +from ..correlators import Corr from .utils import sort_names, check_idl @@ -16,7 +17,71 @@ _corr_type_dict = { } -def read_sfcf(path, prefix, name, quarks='.*', corr_type = "bi" ,noffsets=0, wf1s=0, wf2s=0, version="1.0c", cfg_separator="n", silent=False, **kwargs): +def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf1=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs): + """Read sfcf files from given folder structure. + + Parameters + ---------- + path : str + Path to the sfcf files. + prefix : str + Prefix of the sfcf files. + name : str + Name of the correlation function to read. + quarks : str + Label of the quarks used in the sfcf input file. e.g. "quark quark" + for version 0.0 this does NOT need to be given with the typical " - " + that is present in the output file, + this is done automatically for this version + corr_type : str + Type of correlation function to read. Can be + - 'bi' for boundary-inner + - 'bb' for boundary-boundary + - 'bib' for boundary-inner-boundary + noffset : int + Offset of the source (only relevant when wavefunctions are used) + wf : int + ID of wave function + wf2 : int + ID of the second wavefunction + (only relevant for boundary-to-boundary correlation functions) + im : bool + if True, read imaginary instead of real part + of the correlation function. + names : list + Alternative labeling for replicas/ensembles. + Has to have the appropriate length + ens_name : str + replaces the name of the ensemble + version: str + version of SFCF, with which the measurement was done. + if the compact output option (-c) was specified, + append a "c" to the version (e.g. "1.0c") + if the append output option (-a) was specified, + append an "a" to the version + cfg_separator : str + String that separates the ensemble identifier from the configuration number (default 'n'). + replica: list + list of replica to be read, default is all + files: list + list of files to be read per replica, default is all. + for non-compact output format, hand the folders to be read here. + check_configs: list[list[int]] + list of list of supposed configs, eg. [range(1,1000)] + for one replicum with 1000 configs + + Returns + ------- + result: list[Obs] + list of Observables with length T, observable per timeslice. + bb-type correlators have length 1. + """ + return_dict = read_sfcf_multi(path, prefix, [name], quark_pairs=[quarks], corr_type=[corr_type], noffset_list=[noffset], wf1_list=[wf1], wf2_list=[wf2], version=version, cfg_separator=cfg_separator, silent=silent, **kwargs) + + return return_dict[name][quarks][str(noffset)][str(wf1)][str(wf2)] + + +def read_sfcf_multi(path, prefix, name_list, quark_pairs=['.*'], corr_type=['bi'], noffset_list=[0], wf1_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, **kwargs): """Read sfcf files from given folder structure. Parameters @@ -76,85 +141,25 @@ def read_sfcf(path, prefix, name, quarks='.*', corr_type = "bi" ,noffsets=0, wf1 bb-type correlators have length 1. """ - return read_sfcf_multi(path, prefix, [name], quark_pairs=[quarks], corr_type=[corr_type], noffset=[0], wf=[0], wf2=[0], version="1.0c", cfg_separator="n", silent=False, **kwargs) + # notes imaginary bool + # { + # name + # quarks + # offset + # wf + # if bb or bib + # wf2 + # im/non-im + # } + return_dict = {} - -def read_sfcf_multi(path, prefix, names, quark_pairs='.*', corr_type=['bi'], noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs): - """Read sfcf files from given folder structure. - - Parameters - ---------- - path : str - Path to the sfcf files. - prefix : str - Prefix of the sfcf files. - name : str - Name of the correlation function to read. - quarks : str - Label of the quarks used in the sfcf input file. e.g. "quark quark" - for version 0.0 this does NOT need to be given with the typical " - " - that is present in the output file, - this is done automatically for this version - corr_type : str - Type of correlation function to read. Can be - - 'bi' for boundary-inner - - 'bb' for boundary-boundary - - 'bib' for boundary-inner-boundary - noffset : int - Offset of the source (only relevant when wavefunctions are used) - wf : int - ID of wave function - wf2 : int - ID of the second wavefunction - (only relevant for boundary-to-boundary correlation functions) - im : bool - if True, read imaginary instead of real part - of the correlation function. - names : list - Alternative labeling for replicas/ensembles. - Has to have the appropriate length - ens_name : str - replaces the name of the ensemble - version: str - version of SFCF, with which the measurement was done. - if the compact output option (-c) was specified, - append a "c" to the version (e.g. "1.0c") - if the append output option (-a) was specified, - append an "a" to the version - cfg_separator : str - String that separates the ensemble identifier from the configuration number (default 'n'). - replica: list - list of replica to be read, default is all - files: list - list of files to be read per replica, default is all. - for non-compact output format, hand the folders to be read here. - check_configs: list[list[int]] - list of list of supposed configs, eg. [range(1,1000)] - for one replicum with 1000 configs - - Returns - ------- - result: list[Obs] - list of Observables with length T, observable per timeslice. - bb-type correlators have length 1. - """ - if kwargs.get('im') is True: + if kwargs.get('im'): im = 1 part = 'imaginary' else: im = 0 part = 'real' - if corr_type == 'bb': - b2b = True - single = True - elif corr_type == 'bib': - b2b = True - single = False - else: - b2b = False - single = False - known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] if version not in known_versions: @@ -193,8 +198,8 @@ def read_sfcf_multi(path, prefix, names, quark_pairs='.*', corr_type=['bi'], no else: replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) - if not silent: - print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') + # if not silent: + # print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') # change this if 'names' in kwargs: new_names = kwargs.get('names') @@ -213,6 +218,29 @@ def read_sfcf_multi(path, prefix, names, quark_pairs='.*', corr_type=['bi'], no idl = [] if not appended: + intern = {} + for name, c_type in zip(name_list, corr_type): + intern[name] = {} + if c_type == 'bb': + b2b = True + single = True + elif c_type == 'bib': + b2b = True + single = False + else: + b2b = False + single = False + intern[name]["b2b"] = b2b + intern[name]["spec"] = {} + for quarks in quark_pairs: + intern[name]["spec"][quarks] = {} + for off in noffset_list: + intern[name]["spec"][quarks][str(off)] = {} + for w in wf1_list: + intern[name]["spec"][quarks][str(off)][str(w)] = {} + for w2 in wf2_list: + intern[name]["spec"][quarks][str(off)][str(w)][str(w2)] = {} + for i, item in enumerate(ls): rep_path = path + '/' + item if "files" in kwargs: @@ -237,32 +265,44 @@ def read_sfcf_multi(path, prefix, names, quark_pairs='.*', corr_type=['bi'], no idl.append(rep_idl) # here we have found all the files we need to look into. if i == 0: - # here, we want to find the place within the file, - # where the correlator we need is stored. - # to do so, the pattern needed is put together - # from the input values - if version == "0.0": - file = path + '/' + item + '/' + sub_ls[0] + '/' + name - else: - if compact: - file = path + '/' + item + '/' + sub_ls[0] - else: - file = path + '/' + item + '/' + sub_ls[0] + '/' + name + for name in name_list: + return_dict[name] = {} + for quarks in quark_pairs: + return_dict[name][quarks] = {} + for off in noffset_list: + return_dict[name][quarks][str(off)] = {} + for w in wf1_list: + return_dict[name][quarks][str(off)][str(w)] = {} + for w2 in wf2_list: + return_dict[name][quarks][str(off)][str(w)][str(w2)] = {} + # here, we want to find the place within the file, + # where the correlator we need is stored. + # to do so, the pattern needed is put together + # from the input values + if version == "0.0": + file = path + '/' + item + '/' + sub_ls[0] + '/' + name + else: + if compact: + file = path + '/' + item + '/' + sub_ls[0] + else: + file = path + '/' + item + '/' + sub_ls[0] + '/' + name - pattern = _make_pattern(version, name, noffset, wf, wf2, b2b, quarks) - start_read, T = _find_correlator(file, version, pattern, b2b, silent=silent) - - # preparing the datastructure - # the correlators get parsed into... - deltas = [] - for j in range(T): - deltas.append([]) + intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"] = _make_pattern(version, name, off, w, w2, b2b, quarks) + start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], b2b, silent=silent) + intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read + intern[name]["T"] = T + # preparing the datastructure + # the correlators get parsed into... + deltas = [] + for j in range(T): + deltas.append([]) + return_dict[name][quarks][str(off)][str(w)][str(w2)] = deltas if compact: - rep_deltas = _read_compact_rep(path, item, sub_ls, start_read, T, b2b, name, im) + rep_deltas = _read_compact_rep(path, item, sub_ls, intern, im) for t in range(T): - deltas[t].append(rep_deltas[t]) + return_dict[name][quarks][str(off)][str(w)][str(w2)][t].append(rep_deltas[name][quarks][str(off)][str(w)][str(w2)][t]) else: for t in range(T): deltas[t].append(np.zeros(no_cfg)) @@ -277,6 +317,15 @@ def read_sfcf_multi(path, prefix, names, quark_pairs='.*', corr_type=['bi'], no deltas[k - start_read][i][cnfg] = floats[1 + im - single] else: + if corr_type == 'bb': + b2b = True + single = True + elif corr_type == 'bib': + b2b = True + single = False + else: + b2b = False + single = False if "files" in kwargs: ls = kwargs.get("files") else: @@ -309,10 +358,24 @@ def read_sfcf_multi(path, prefix, names, quark_pairs='.*', corr_type=['bi'], no check_idl(idl[r], che[r]) if not silent: print("Done") - result = [] - for t in range(T): - result.append(Obs(deltas[t], new_names, idl=idl)) - return result + + result_dict = {} + for name in name_list: + result_dict[name] = {} + for quarks in quark_pairs: + result_dict[name][quarks] = {} + for off in noffset_list: + result_dict[name][quarks][str(off)] = {} + for w in wf1_list: + result_dict[name][quarks][str(off)][str(w)] = {} + for w2 in wf2_list: + result_dict[name][quarks][str(off)][str(w)][str(w2)] = {} + result = [] + for t in range(intern[name]["T"]): + result.append(Obs(return_dict[name][quarks][str(off)][str(w)][str(w2)][t], new_names, idl=idl)) + result_dict[name][quarks][str(off)][str(w)][str(w2)] = result + print(result_dict) + return result_dict def _find_files(rep_path, prefix, compact, files=[]): @@ -382,38 +445,69 @@ def _find_correlator(file_name, version, pattern, b2b, silent=False): return start_read, T -def _read_compact_file(rep_path, config_file, start_read, T, b2b, name, im): - with open(rep_path + config_file) as fp: +def _read_compact_file(rep_path, cfg_file, intern, im): + return_vals = {} + with open(rep_path + cfg_file) as fp: lines = fp.readlines() - # check, if the correlator is in fact - # printed completely - if (start_read + T + 1 > len(lines)): - raise Exception("EOF before end of correlator data! Maybe " + rep_path + config_file + " is corrupted?") - corr_lines = lines[start_read - 6: start_read + T] - del lines - t_vals = [] + return_vals = {} + for name in intern.keys(): + return_vals[name] = {} + for quarks in intern[name]["spec"].keys(): + return_vals[name][quarks] = {} + for off in intern[name]["spec"][quarks].keys(): + return_vals[name][quarks][off] = {} + for w in intern[name]["spec"][quarks][off].keys(): + return_vals[name][quarks][off][w] = {} + for w2 in intern[name]["spec"][quarks][off][w].keys(): + T = intern[name]["T"] + start_read = intern[name]["spec"][quarks][off][w][w2]["start"] + # check, if the correlator is in fact + # printed completely + if (start_read + T + 1 > len(lines)): + raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?") + corr_lines = lines[start_read - 6: start_read + T] + t_vals = [] - if corr_lines[1 - b2b].strip() != 'name ' + name: - raise Exception('Wrong format in file', config_file) + if corr_lines[1 - intern[name]["b2b"]].strip() != 'name ' + name: + raise Exception('Wrong format in file', cfg_file) - for k in range(6, T + 6): - floats = list(map(float, corr_lines[k].split())) - t_vals.append(floats[-2:][im]) - return t_vals + for k in range(6, T + 6): + floats = list(map(float, corr_lines[k].split())) + t_vals.append(floats[-2:][im]) + return_vals[name][quarks][off][w][w2] = t_vals + return return_vals -def _read_compact_rep(path, rep, sub_ls, start_read, T, b2b, name, im): +def _read_compact_rep(path, rep, sub_ls, intern, im_list): rep_path = path + '/' + rep + '/' no_cfg = len(sub_ls) - deltas = [] - for t in range(T): - deltas.append(np.zeros(no_cfg)) + + return_vals = {} + for name in intern.keys(): + return_vals[name] = {} + for quarks in intern[name]["spec"].keys(): + return_vals[name][quarks] = {} + for off in intern[name]["spec"][quarks].keys(): + return_vals[name][quarks][off] = {} + for w in intern[name]["spec"][quarks][off].keys(): + return_vals[name][quarks][off][w] = {} + for w2 in intern[name]["spec"][quarks][off][w].keys(): + deltas = [] + for t in range(intern[name]["T"]): + deltas.append(np.zeros(no_cfg)) + return_vals[name][quarks][off][w][w2] = deltas for cfg in range(no_cfg): cfg_file = sub_ls[cfg] - cfg_data = _read_compact_file(rep_path, cfg_file, start_read, T, b2b, name, im) - for t in range(T): - deltas[t][cfg] = cfg_data[t] - return deltas + cfg_data = _read_compact_file(rep_path, cfg_file, intern, im_list) + print(cfg_data) + for name in intern.keys(): + for quarks in intern[name]["spec"].keys(): + for off in intern[name]["spec"][quarks].keys(): + for w in intern[name]["spec"][quarks][off].keys(): + for w2 in intern[name]["spec"][quarks][off][w].keys(): + for t in range(intern[name]["T"]): + return_vals[name][quarks][off][w][w2][t][cfg] = cfg_data[name][quarks][off][w][w2][t] + return return_vals def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single):