flake8 compliance

This commit is contained in:
jkuhl-uni 2022-01-14 16:00:40 +01:00
parent 53f727092d
commit 302a7ae439

View file

@ -8,39 +8,55 @@ import numpy as np # Thinly-wrapped numpy
from ..obs import Obs from ..obs import Obs
from . import utils from . import utils
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0c", **kwargs):
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
version="1.0c", **kwargs):
"""Read sfcf c format from given folder structure. """Read sfcf c format from given folder structure.
Parameters Parameters
---------- ----------
quarks: str quarks: str
Label of the quarks used in the sfcf input file. e.g. "quark quark" Label of the quarks used in the sfcf input file. e.g. "quark quark"
for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, for version 0.0 this does NOT need to be given with the typical " - "
that is present in the output file,
this is done automatically for this version this is done automatically for this version
noffset: int noffset: int
Offset of the source (only relevant when wavefunctions are used) Offset of the source (only relevant when wavefunctions are used)
wf: int wf: int
ID of wave function ID of wave function
wf2: int wf2: int
ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) ID of the second wavefunction
(only relevant for boundary-to-boundary correlation functions)
im: bool im: bool
if True, read imaginary instead of real part of the correlation function. if True, read imaginary instead of real part
of the correlation function.
b2b: bool b2b: bool
if True, read a time-dependent boundary-to-boundary correlation function if True, read a time-dependent boundary-to-boundary
correlation function
single: bool single: bool
if True, read time independent boundary to boundary correlation function if True, read time independent boundary to boundary
correlation function
names: list names: list
Alternative labeling for replicas/ensembles. Has to have the appropriate length Alternative labeling for replicas/ensembles.
Has to have the appropriate length
ens_name : str ens_name : str
replaces the name of the ensemble replaces the name of the ensemble
version: str version: str
version of SFCF, with which the measurement was done. if the compact output option (-c) was spectified, append a c to the version (e.g. "1.0c") version of SFCF, with which the measurement was done.
if the compact output option (-c) was spectified,
append a "c" to the version (e.g. "1.0c")
if the append output option (-a) was specified,
append an "a" to the version
replica: list replica: list
list of replica to be read, default is all list of replica to be read, default is all
files: list files: list
list of files to be read per replica, default is all. for non-conpact ouztput format, hand the folders to be read here. list of files to be read per replica, default is all.
for non-conpact ouztput format, hand the folders to be read here.
check_configs: check_configs:
list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs list of list of supposed configs, eg. [range(1,1000)]
for one replicum with 1000 configs
TODO:
- whats going on with files here?
""" """
if kwargs.get('im'): if kwargs.get('im'):
im = 1 im = 1
@ -63,15 +79,17 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
if "files" in kwargs: if "files" in kwargs:
files = kwargs.get("files") files = kwargs.get("files")
#due to higher usage in current projects, compact file format is default # due to higher usage in current projects,
# compact file format is default
compact = True compact = True
appended = False appended = False
#get version string # get version string
known_versions = ["0.0","1.0","2.0","1.0c","2.0c","1.0a","2.0a"] known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
if not version in known_versions: if version not in known_versions:
raise Exception("This version is not known!") raise Exception("This version is not known!")
#if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c) # if the letter c is appended to the version,
# the compact fileformat is used (former read_sfcf_c)
if(version[-1] == "c"): if(version[-1] == "c"):
appended = False appended = False
compact = True compact = True
@ -103,15 +121,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
if not fnmatch.fnmatch(exc, prefix + '*'): if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc])) ls = list(set(ls) - set([exc]))
if len(ls) > 1: if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. # New version, to cope with ids, etc.
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
if not appended: if not appended:
replica = len(ls) replica = len(ls)
else: else:
replica = len([l.split(".")[-1] for l in ls])//len(set([l.split(".")[-1] for l in ls])) replica = len([file.split(".")[-1] for file in ls])\
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') // len(set([file.split(".")[-1] for file in ls]))
print('Read', part, 'part of', name, 'from', prefix[:-1],
',', replica, 'replica')
if 'names' in kwargs: if 'names' in kwargs:
new_names = kwargs.get('names') new_names = kwargs.get('names')
if len(new_names)!=len(set(new_names)): if len(new_names) != len(set(new_names)):
raise Exception("names are not unique!") raise Exception("names are not unique!")
if len(new_names) != replica: if len(new_names) != replica:
raise Exception('Names does not have the required length', replica) raise Exception('Names does not have the required length', replica)
@ -123,11 +145,13 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
for entry in ls: for entry in ls:
try: try:
idx = entry.index('r') idx = entry.index('r')
except: except Exception:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") raise Exception("Automatic recognition of replicum failed, \
please enter the key word 'names'.")
if 'ens_name' in kwargs: if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) new_names.append(kwargs.get('ens_name') + '|' +
entry[idx:])
else: else:
new_names.append(entry[:idx] + '|' + entry[idx:]) new_names.append(entry[:idx] + '|' + entry[idx:])
else: else:
@ -138,17 +162,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
for entry in ls: for entry in ls:
myentry = entry[:-len(name)-1] myentry = entry[:-len(name)-1]
print(myentry) # print(myentry)
try: try:
idx = myentry.index('r') idx = myentry.index('r')
except: except Exception:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") raise Exception("Automatic recognition of replicum failed, \
please enter the key word 'names'.")
if 'ens_name' in kwargs: if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) new_names.append(kwargs.get('ens_name') + '|' +
myentry[idx:])
else: else:
new_names.append(myentry[:idx] + '|' + myentry[idx:]) new_names.append(myentry[:idx] + '|' + myentry[idx:])
#print(new_names) # print(new_names)
idl = [] idl = []
if not appended: if not appended:
for i, item in enumerate(ls): for i, item in enumerate(ls):
@ -157,24 +183,26 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
sub_ls = kwargs.get("files") sub_ls = kwargs.get("files")
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
else: else:
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): for (dirpath, dirnames, filenames) in \
os.walk(path + '/' + item):
if compact: if compact:
sub_ls.extend(filenames) sub_ls.extend(filenames)
else: else:
sub_ls.extend(dirnames) sub_ls.extend(dirnames)
break break
#print(sub_ls) # print(sub_ls)
for exc in sub_ls: for exc in sub_ls:
if compact: if compact:
if not fnmatch.fnmatch(exc, prefix + '*'): if not fnmatch.fnmatch(exc, prefix + '*'):
sub_ls = list(set(sub_ls) - set([exc])) sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) sub_ls.sort(key=lambda x:
int(re.findall(r'\d+', x)[-1]))
else: else:
if not fnmatch.fnmatch(exc, 'cfg*'): if not fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set([exc])) sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(x[3:])) sub_ls.sort(key=lambda x: int(x[3:]))
#print(sub_ls) # print(sub_ls)
rep_idl = [] rep_idl = []
no_cfg = len(sub_ls) no_cfg = len(sub_ls)
for cfg in sub_ls: for cfg in sub_ls:
@ -183,54 +211,73 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
rep_idl.append(int(cfg.split("n")[-1])) rep_idl.append(int(cfg.split("n")[-1]))
else: else:
rep_idl.append(int(cfg[3:])) rep_idl.append(int(cfg[3:]))
except: except Exception:
raise Exception("Couldn't parse idl from directroy, problem with file "+cfg) raise Exception("Couldn't parse idl from directroy, \
problem with file "+cfg)
rep_idl.sort() rep_idl.sort()
#maybe there is a better way to print the idls # maybe there is a better way to print the idls
print(item, ':', no_cfg, ' configurations') print(item, ':', no_cfg, ' configurations')
idl.append(rep_idl) idl.append(rep_idl)
#here we have found all the files we need to look into. # here we have found all the files we need to look into.
if i == 0: if i == 0:
#here, we want to find the place within the file, where the correlator we need is stored. # here, we want to find the place within the file,
# where the correlator we need is stored.
if compact: if compact:
#to do so, the pattern needed is put together from the input values # to do so, the pattern needed is put together
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) # from the input values
pattern = 'name ' + name + '\nquarks '\
+ quarks + '\noffset '\
+ str(noffset) + '\nwf '\
+ str(wf)
if b2b: if b2b:
pattern += '\nwf_2 ' + str(wf2) pattern += '\nwf_2 ' + str(wf2)
#and the file is parsed through to find the pattern # and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: with open(path + '/' + item + '/' + sub_ls[0], 'r') \
as file:
content = file.read() content = file.read()
match = re.search(pattern, content) match = re.search(pattern, content)
if match: if match:
#the start and end point of the correlator in quaetion is extracted for later use in the other files # the start and end point of the correlator
start_read = content.count('\n', 0, match.start()) + 5 + b2b # in quaetion is extracted for later use in
end_match = re.search(r'\n\s*\n', content[match.start():]) # the other files
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b start_read = content.count('\n', 0, match.start())\
+ 5 + b2b
end_match = re.search(r'\n\s*\n',
content[match.start():])
T = content[match.start():]\
.count('\n', 0, end_match.start()) - 4 - b2b
assert T > 0 assert T > 0
print(T, 'entries, starting to read in line', start_read) print(T, 'entries, starting to read in line',
start_read)
else: else:
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') raise Exception('Correlator with pattern\n'
+ pattern + '\nnot found.')
else: else:
#this part does the same as above, but for non-compactified versions of the files # this part does the same as above,
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: # but for non-compactified versions of the files
with open(path + '/' + item + '/' + sub_ls[0] + '/'
+ name) as fp:
for k, line in enumerate(fp): for k, line in enumerate(fp):
if version == "0.0": if version == "0.0":
#check if this is really the right file by matchin pattern similar to above # check if this is really the right file
pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) # by matching pattern similar to above
#if b2b, a second wf is needed pattern = "# "+name+" : offset "+str(noffset)\
+ ", wf "+str(wf)
# if b2b, a second wf is needed
if b2b: if b2b:
pattern+=", wf_2 "+str(wf2) pattern += ", wf_2 "+str(wf2)
qs = quarks.split(" ") qs = quarks.split(" ")
pattern+=" : "+qs[0]+" - "+qs[1] pattern += " : " + qs[0]+" - " + qs[1]
#print(pattern) # print(pattern)
if read == 1 and not line.strip() and k > start + 1: if read == 1 and not line.strip() \
and k > start + 1:
break break
if read == 1 and k >= start: if read == 1 and k >= start:
T += 1 T += 1
if version == "0.0": if version == "0.0":
if pattern in line: if pattern in line:
#print(line) # print(line)
read = 1 read = 1
start = k+1 start = k+1
else: else:
@ -239,121 +286,133 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
start = k + 7 + b2b start = k + 7 + b2b
T -= b2b T -= b2b
print(str(T)+" entries found.") print(str(T)+" entries found.")
#we found where the correlator that is to be read is in the files # we found where the correlator
#after preparing the datastructure the correlators get parsed into... # that is to be read is in the files
# after preparing the datastructure
# the correlators get parsed into...
deltas = [] deltas = []
for j in range(T): for j in range(T):
deltas.append([]) deltas.append([])
for t in range(T): for t in range(T):
deltas[t].append(np.zeros(no_cfg)) deltas[t].append(np.zeros(no_cfg))
#... the actual parsing can start. we iterate through all measurement files in the path given... # ...the actual parsing can start.
# we iterate through all measurement files in the path given...
if compact: if compact:
for cfg in range(no_cfg): for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
lines = fp.readlines() lines = fp.readlines()
#check, if the correlator is in fact printed completely # check, if the correlator is in fact
if(start_read + T>len(lines)): # printed completely
raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") if(start_read + T > len(lines)):
#and start to read the correlator. raise Exception("EOF before end of correlator data! \
#the range here is chosen like this, since this allows for implementing a security check for every read correlator later... Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" \
for k in range(start_read - 6,start_read + T): is corrupted?")
# and start to read the correlator.
# the range here is chosen like this,
# since this allows for implementing
# a security check for every read correlator later...
for k in range(start_read - 6, start_read + T):
if k == start_read - 5 - b2b: if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name: if lines[k].strip() != 'name ' + name:
raise Exception('Wrong format', sub_ls[cfg]) raise Exception('Wrong format',
sub_ls[cfg])
if(k >= start_read and k < start_read + T): if(k >= start_read and k < start_read + T):
floats = list(map(float, lines[k].split())) floats = list(map(float, lines[k].split()))
deltas[k - start_read][i][cfg] = floats[-2:][im] deltas[k - start_read][i][cfg] = \
floats[-2:][im]
else: else:
for cnfg, subitem in enumerate(sub_ls): for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp: with open(path + '/' + item + '/' + subitem
#since the non-compatified files are typically not so long, we can iterate over the whole file. + '/' + name) as fp:
#here one can also implement the chekc from above. # since the non-compatified files
# are typically not so long,
# we can iterate over the whole file.
# here one can also implement the chekc from above.
for k, line in enumerate(fp): for k, line in enumerate(fp):
if(k >= start and k < start + T): if(k >= start and k < start + T):
floats = list(map(float, line.split())) floats = list(map(float, line.split()))
if version == "0.0": if version == "0.0":
deltas[k-start][i][cnfg] = floats[im] deltas[k-start][i][cnfg] = floats[im]
else: else:
deltas[k - start][i][cnfg] = floats[1 + im - single] deltas[k - start][i][cnfg] = \
floats[1 + im - single]
else: else:
for exc in ls: for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.'+name): if not fnmatch.fnmatch(exc, prefix + '*.'+name):
ls = list(set(ls) - set([exc])) ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
#print(ls) # print(ls)
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) pattern = 'name ' + name + '\nquarks '\
+ quarks + '\noffset ' + str(noffset)\
+ '\nwf ' + str(wf)
if b2b: if b2b:
pattern += '\nwf_2 ' + str(wf2) pattern += '\nwf_2 ' + str(wf2)
for rep,file in enumerate(ls): for rep, file in enumerate(ls):
rep_idl = [] rep_idl = []
with open(path + '/' + file, 'r') as fp: with open(path + '/' + file, 'r') as fp:
content = fp.readlines() content = fp.readlines()
data_starts = [] data_starts = []
for l,line in enumerate(content): for linenumber, line in enumerate(content):
if "[run]" in line: if "[run]" in line:
data_starts.append(l) data_starts.append(linenumber)
if len(set([data_starts[i]-data_starts[i-1] for i in range(1,len(data_starts))])) > 1: if len(set([data_starts[i]-data_starts[i-1] for i in
raise Exception ("Irregularities in file structure found, not all runs have the same output length") range(1, len(data_starts))])) > 1:
#print(data_starts) raise Exception("Irregularities in file structure found,\
#first chunk of data not all runs have the same output length")
# first chunk of data
chunk = content[:data_starts[1]] chunk = content[:data_starts[1]]
for l,line in enumerate(chunk): for linenumber, line in enumerate(chunk):
if line.startswith("gauge_name"): if line.startswith("gauge_name"):
gauge_line = l gauge_line = linenumber
#meta_data["gauge_name"] = (line.strip()).split("/")[-1]
elif line.startswith("[correlator]"): elif line.startswith("[correlator]"):
corr_line = l corr_line = linenumber
found_pat = "" found_pat = ""
for li in chunk[corr_line+1:corr_line+6+b2b]: for li in chunk[corr_line+1:corr_line+6+b2b]:
found_pat += li found_pat += li
if re.search(pattern,found_pat): if re.search(pattern, found_pat):
start_read = corr_line+7+b2b start_read = corr_line+7+b2b
T=len(chunk)-1-start_read T = len(chunk)-1-start_read
if rep == 0: if rep == 0:
deltas = [] deltas = []
for t in range(T): for t in range(T):
deltas.append([]) deltas.append([])
for t in range(T): for t in range(T):
deltas[t].append(np.zeros(len(data_starts))) deltas[t].append(np.zeros(len(data_starts)))
#all other chunks should follow the same structure # all other chunks should follow the same structure
for cnfg in range(len(data_starts)): for cnfg in range(len(data_starts)):
start = data_starts[cnfg] start = data_starts[cnfg]
stop = start+data_starts[1] stop = start+data_starts[1]
chunk = content[start:stop] chunk = content[start:stop]
#meta_data = {} # meta_data = {}
try: try:
rep_idl.append(int(chunk[gauge_line].split("n")[-1])) rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
except: except Exception:
raise Exception("Couldn't parse idl from directroy, problem with chunk around line "+gauge_line) raise Exception("Couldn't parse idl from directroy, \
problem with chunk around line "+gauge_line)
found_pat = "" found_pat = ""
for li in chunk[corr_line+1:corr_line+6+b2b]: for li in chunk[corr_line+1:corr_line+6+b2b]:
found_pat += li found_pat += li
if re.search(pattern,found_pat): if re.search(pattern, found_pat):
#print("found pattern") for t, line in \
for t,line in enumerate(chunk[start_read:start_read+T]): enumerate(chunk[start_read:start_read+T]):
floats = list(map(float, line.split())) floats = list(map(float, line.split()))
deltas[t][rep][cnfg] = floats[-2:][im] deltas[t][rep][cnfg] = floats[-2:][im]
idl.append(rep_idl) idl.append(rep_idl)
#print(new_names)
#print(deltas)
#print(idl)
if "check_configs" in kwargs: if "check_configs" in kwargs:
print("Checking for missing configs...") print("Checking for missing configs...")
che = kwargs.get("check_configs") che = kwargs.get("check_configs")
if not (len(che) == len(idl)): if not (len(che) == len(idl)):
raise Exception("check_configs has to be the same length as replica!") raise Exception("check_configs has to be the same length\
as replica!")
for r in range(len(idl)): for r in range(len(idl)):
print("checking "+new_names[r]) print("checking "+new_names[r])
utils.check_idl(idl[r], che[r]) utils.check_idl(idl[r], che[r])
print("Done") print("Done")
result = [] result = []
for t in range(T): for t in range(T):
result.append(Obs(deltas[t], new_names, idl = idl)) result.append(Obs(deltas[t], new_names, idl=idl))
return result return result