pyerrors.input.sfcf
View Source
#!/usr/bin/env python # coding: utf-8 import os import fnmatch import re import numpy as np # Thinly-wrapped numpy from ..obs import Obs from . import utils def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version="1.0c", **kwargs): """Read sfcf c format from given folder structure. Parameters ---------- quarks: str Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version noffset: int Offset of the source (only relevant when wavefunctions are used) wf: int ID of wave function wf2: int ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) im: bool if True, read imaginary instead of real part of the correlation function. b2b: bool if True, read a time-dependent boundary-to-boundary correlation function single: bool if True, read time independent boundary to boundary correlation function names: list Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble version: str version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version. Currently supported versions are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a". replica: list list of replica to be read, default is all files: list list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here. check_configs: list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs TODO: - whats going on with files here? """ if kwargs.get('im'): im = 1 part = 'imaginary' else: im = 0 part = 'real' if kwargs.get('single'): b2b = 1 single = 1 else: if kwargs.get('b2b'): b2b = 1 else: b2b = 0 single = 0 if "replica" in kwargs: reps = kwargs.get("replica") compact = True appended = False known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] if version not in known_versions: raise Exception("This version is not known!") if(version[-1] == "c"): appended = False compact = True version = version[:-1] elif(version[-1] == "a"): appended = True compact = False version = version[:-1] else: compact = False appended = False read = 0 T = 0 start = 0 ls = [] if "replica" in kwargs: ls = reps else: for (dirpath, dirnames, filenames) in os.walk(path): if not appended: ls.extend(dirnames) else: ls.extend(filenames) break if not ls: raise Exception('Error, directory not found') # Exclude folders with different names for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set([exc])) if len(ls) > 1: ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) if not appended: replica = len(ls) else: replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') if 'names' in kwargs: new_names = kwargs.get('names') if len(new_names) != len(set(new_names)): raise Exception("names are not unique!") if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: new_names = [] if not appended: for entry in ls: try: idx = entry.index('r') except Exception: raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) else: for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) for entry in ls: myentry = entry[:-len(name) - 1] try: idx = myentry.index('r') except Exception: raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) else: new_names.append(myentry[:idx] + '|' + myentry[idx:]) idl = [] if not appended: for i, item in enumerate(ls): sub_ls = [] if "files" in kwargs: sub_ls = kwargs.get("files") sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): if compact: sub_ls.extend(filenames) else: sub_ls.extend(dirnames) break for exc in sub_ls: if compact: if not fnmatch.fnmatch(exc, prefix + '*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: if not fnmatch.fnmatch(exc, 'cfg*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(x[3:])) rep_idl = [] no_cfg = len(sub_ls) for cfg in sub_ls: try: if compact: rep_idl.append(int(cfg.split("n")[-1])) else: rep_idl.append(int(cfg[3:])) except Exception: raise Exception("Couldn't parse idl from directroy, problem with file " + cfg) rep_idl.sort() print(item, ':', no_cfg, ' configurations') idl.append(rep_idl) if i == 0: if compact: pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: content = file.read() match = re.search(pattern, content) if match: # the start and end point of the correlator # in question is extracted for later use in # the other files start_read = content.count('\n', 0, match.start()) + 5 + b2b end_match = re.search(r'\n\s*\n', content[match.start():]) T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b assert T > 0 print(T, 'entries, starting to read in line', start_read) else: raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') else: # this part does the same as above, # but for non-compactified versions of the files with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: for k, line in enumerate(fp): if version == "0.0": # check if this is really the right file # by matching pattern similar to above pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf) # if b2b, a second wf is needed if b2b: pattern += ", wf_2 " + str(wf2) qs = quarks.split(" ") pattern += " : " + qs[0] + " - " + qs[1] if read == 1 and not line.strip() and k > start + 1: break if read == 1 and k >= start: T += 1 if version == "0.0": if pattern in line: # print(line) read = 1 start = k + 1 else: if '[correlator]' in line: read = 1 start = k + 7 + b2b T -= b2b print(str(T) + " entries found.") deltas = [] for j in range(T): deltas.append([]) for t in range(T): deltas[t].append(np.zeros(no_cfg)) # we iterate through all measurement files in the path given... if compact: for cfg in range(no_cfg): with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: lines = fp.readlines() if(start_read + T > len(lines)): raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?") for k in range(start_read - 6, start_read + T): if k == start_read - 5 - b2b: if lines[k].strip() != 'name ' + name: raise Exception('Wrong format', sub_ls[cfg]) if(k >= start_read and k < start_read + T): floats = list(map(float, lines[k].split())) deltas[k - start_read][i][cfg] = floats[-2:][im] else: for cnfg, subitem in enumerate(sub_ls): with open(path + '/' + item + '/' + subitem + '/' + name) as fp: for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) if version == "0.0": deltas[k - start][i][cnfg] = floats[im] else: deltas[k - start][i][cnfg] = floats[1 + im - single] else: if "files" in kwargs: ls = kwargs.get("files") else: for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) for rep, file in enumerate(ls): rep_idl = [] with open(path + '/' + file, 'r') as fp: content = fp.readlines() data_starts = [] for linenumber, line in enumerate(content): if "[run]" in line: data_starts.append(linenumber) if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1: raise Exception("Irregularities in file structure found, not all runs have the same output length") chunk = content[:data_starts[1]] for linenumber, line in enumerate(chunk): if line.startswith("gauge_name"): gauge_line = linenumber elif line.startswith("[correlator]"): corr_line = linenumber found_pat = "" for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): start_read = corr_line + 7 + b2b T = len(chunk) - 1 - start_read if rep == 0: deltas = [] for t in range(T): deltas.append([]) for t in range(T): deltas[t].append(np.zeros(len(data_starts))) for cnfg in range(len(data_starts)): start = data_starts[cnfg] stop = start + data_starts[1] chunk = content[start:stop] try: rep_idl.append(int(chunk[gauge_line].split("n")[-1])) except Exception: raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line) found_pat = "" for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): for t, line in enumerate(chunk[start_read:start_read + T]): floats = list(map(float, line.split())) deltas[t][rep][cnfg] = floats[-2:][im] idl.append(rep_idl) if "check_configs" in kwargs: print("Checking for missing configs...") che = kwargs.get("check_configs") if not (len(che) == len(idl)): raise Exception("check_configs has to be the same length as replica!") for r in range(len(idl)): print("checking " + new_names[r]) utils.check_idl(idl[r], che[r]) print("Done") result = [] for t in range(T): result.append(Obs(deltas[t], new_names, idl=idl)) return result
#  
def
read_sfcf(
path,
prefix,
name,
quarks='.*',
noffset=0,
wf=0,
wf2=0,
version='1.0c',
**kwargs
):
View Source
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version="1.0c", **kwargs): """Read sfcf c format from given folder structure. Parameters ---------- quarks: str Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version noffset: int Offset of the source (only relevant when wavefunctions are used) wf: int ID of wave function wf2: int ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) im: bool if True, read imaginary instead of real part of the correlation function. b2b: bool if True, read a time-dependent boundary-to-boundary correlation function single: bool if True, read time independent boundary to boundary correlation function names: list Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble version: str version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version. Currently supported versions are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a". replica: list list of replica to be read, default is all files: list list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here. check_configs: list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs TODO: - whats going on with files here? """ if kwargs.get('im'): im = 1 part = 'imaginary' else: im = 0 part = 'real' if kwargs.get('single'): b2b = 1 single = 1 else: if kwargs.get('b2b'): b2b = 1 else: b2b = 0 single = 0 if "replica" in kwargs: reps = kwargs.get("replica") compact = True appended = False known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] if version not in known_versions: raise Exception("This version is not known!") if(version[-1] == "c"): appended = False compact = True version = version[:-1] elif(version[-1] == "a"): appended = True compact = False version = version[:-1] else: compact = False appended = False read = 0 T = 0 start = 0 ls = [] if "replica" in kwargs: ls = reps else: for (dirpath, dirnames, filenames) in os.walk(path): if not appended: ls.extend(dirnames) else: ls.extend(filenames) break if not ls: raise Exception('Error, directory not found') # Exclude folders with different names for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set([exc])) if len(ls) > 1: ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) if not appended: replica = len(ls) else: replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') if 'names' in kwargs: new_names = kwargs.get('names') if len(new_names) != len(set(new_names)): raise Exception("names are not unique!") if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: new_names = [] if not appended: for entry in ls: try: idx = entry.index('r') except Exception: raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) else: for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) for entry in ls: myentry = entry[:-len(name) - 1] try: idx = myentry.index('r') except Exception: raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) else: new_names.append(myentry[:idx] + '|' + myentry[idx:]) idl = [] if not appended: for i, item in enumerate(ls): sub_ls = [] if "files" in kwargs: sub_ls = kwargs.get("files") sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): if compact: sub_ls.extend(filenames) else: sub_ls.extend(dirnames) break for exc in sub_ls: if compact: if not fnmatch.fnmatch(exc, prefix + '*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: if not fnmatch.fnmatch(exc, 'cfg*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(x[3:])) rep_idl = [] no_cfg = len(sub_ls) for cfg in sub_ls: try: if compact: rep_idl.append(int(cfg.split("n")[-1])) else: rep_idl.append(int(cfg[3:])) except Exception: raise Exception("Couldn't parse idl from directroy, problem with file " + cfg) rep_idl.sort() print(item, ':', no_cfg, ' configurations') idl.append(rep_idl) if i == 0: if compact: pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: content = file.read() match = re.search(pattern, content) if match: # the start and end point of the correlator # in question is extracted for later use in # the other files start_read = content.count('\n', 0, match.start()) + 5 + b2b end_match = re.search(r'\n\s*\n', content[match.start():]) T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b assert T > 0 print(T, 'entries, starting to read in line', start_read) else: raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') else: # this part does the same as above, # but for non-compactified versions of the files with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: for k, line in enumerate(fp): if version == "0.0": # check if this is really the right file # by matching pattern similar to above pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf) # if b2b, a second wf is needed if b2b: pattern += ", wf_2 " + str(wf2) qs = quarks.split(" ") pattern += " : " + qs[0] + " - " + qs[1] if read == 1 and not line.strip() and k > start + 1: break if read == 1 and k >= start: T += 1 if version == "0.0": if pattern in line: # print(line) read = 1 start = k + 1 else: if '[correlator]' in line: read = 1 start = k + 7 + b2b T -= b2b print(str(T) + " entries found.") deltas = [] for j in range(T): deltas.append([]) for t in range(T): deltas[t].append(np.zeros(no_cfg)) # we iterate through all measurement files in the path given... if compact: for cfg in range(no_cfg): with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: lines = fp.readlines() if(start_read + T > len(lines)): raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?") for k in range(start_read - 6, start_read + T): if k == start_read - 5 - b2b: if lines[k].strip() != 'name ' + name: raise Exception('Wrong format', sub_ls[cfg]) if(k >= start_read and k < start_read + T): floats = list(map(float, lines[k].split())) deltas[k - start_read][i][cfg] = floats[-2:][im] else: for cnfg, subitem in enumerate(sub_ls): with open(path + '/' + item + '/' + subitem + '/' + name) as fp: for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) if version == "0.0": deltas[k - start][i][cnfg] = floats[im] else: deltas[k - start][i][cnfg] = floats[1 + im - single] else: if "files" in kwargs: ls = kwargs.get("files") else: for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) for rep, file in enumerate(ls): rep_idl = [] with open(path + '/' + file, 'r') as fp: content = fp.readlines() data_starts = [] for linenumber, line in enumerate(content): if "[run]" in line: data_starts.append(linenumber) if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1: raise Exception("Irregularities in file structure found, not all runs have the same output length") chunk = content[:data_starts[1]] for linenumber, line in enumerate(chunk): if line.startswith("gauge_name"): gauge_line = linenumber elif line.startswith("[correlator]"): corr_line = linenumber found_pat = "" for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): start_read = corr_line + 7 + b2b T = len(chunk) - 1 - start_read if rep == 0: deltas = [] for t in range(T): deltas.append([]) for t in range(T): deltas[t].append(np.zeros(len(data_starts))) for cnfg in range(len(data_starts)): start = data_starts[cnfg] stop = start + data_starts[1] chunk = content[start:stop] try: rep_idl.append(int(chunk[gauge_line].split("n")[-1])) except Exception: raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line) found_pat = "" for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): for t, line in enumerate(chunk[start_read:start_read + T]): floats = list(map(float, line.split())) deltas[t][rep][cnfg] = floats[-2:][im] idl.append(rep_idl) if "check_configs" in kwargs: print("Checking for missing configs...") che = kwargs.get("check_configs") if not (len(che) == len(idl)): raise Exception("check_configs has to be the same length as replica!") for r in range(len(idl)): print("checking " + new_names[r]) utils.check_idl(idl[r], che[r]) print("Done") result = [] for t in range(T): result.append(Obs(deltas[t], new_names, idl=idl)) return result
Read sfcf c format from given folder structure.
Parameters
- quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
- noffset (int): Offset of the source (only relevant when wavefunctions are used)
- wf (int): ID of wave function
- wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
- im (bool): if True, read imaginary instead of real part of the correlation function.
- b2b (bool): if True, read a time-dependent boundary-to-boundary correlation function
- single (bool): if True, read time independent boundary to boundary correlation function
- names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
- ens_name (str): replaces the name of the ensemble
- version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version. Currently supported versions are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a".
- replica (list): list of replica to be read, default is all
- files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
- check_configs:: list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
- TODO:
- - whats going on with files here?