refactor: unnecessary comments in input/sfcf removed, typos corrected

This commit is contained in:
Fabian Joswig 2022-01-16 16:50:51 +01:00
parent b0bc9c8e47
commit f8cbaef626

View file

@ -43,15 +43,16 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
replaces the name of the ensemble
version: str
version of SFCF, with which the measurement was done.
if the compact output option (-c) was spectified,
if the compact output option (-c) was specified,
append a "c" to the version (e.g. "1.0c")
if the append output option (-a) was specified,
append an "a" to the version
append an "a" to the version. Currently supported versions
are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a".
replica: list
list of replica to be read, default is all
files: list
list of files to be read per replica, default is all.
for non-conpact ouztput format, hand the folders to be read here.
for non-compact output format, hand the folders to be read here.
check_configs:
list of list of supposed configs, eg. [range(1,1000)]
for one replicum with 1000 configs
@ -77,17 +78,12 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if "replica" in kwargs:
reps = kwargs.get("replica")
# due to higher usage in current projects,
# compact file format is default
compact = True
appended = False
# get version string
known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
if version not in known_versions:
raise Exception("This version is not known!")
# if the letter c is appended to the version,
# the compact fileformat is used (former read_sfcf_c)
if(version[-1] == "c"):
appended = False
compact = True
@ -119,7 +115,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
# New version, to cope with ids, etc.
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
if not appended:
@ -135,8 +130,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
# Adjust replica names to new bookmarking system
new_names = []
if not appended:
for entry in ls:
@ -157,7 +150,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
for entry in ls:
myentry = entry[:-len(name) - 1]
# print(myentry)
try:
idx = myentry.index('r')
except Exception:
@ -167,7 +159,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
else:
new_names.append(myentry[:idx] + '|' + myentry[idx:])
# print(new_names)
idl = []
if not appended:
for i, item in enumerate(ls):
@ -183,7 +174,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
sub_ls.extend(dirnames)
break
# print(sub_ls)
for exc in sub_ls:
if compact:
if not fnmatch.fnmatch(exc, prefix + '*'):
@ -194,7 +184,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(x[3:]))
# print(sub_ls)
rep_idl = []
no_cfg = len(sub_ls)
for cfg in sub_ls:
@ -206,26 +195,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
except Exception:
raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
rep_idl.sort()
# maybe there is a better way to print the idls
print(item, ':', no_cfg, ' configurations')
idl.append(rep_idl)
# here we have found all the files we need to look into.
if i == 0:
# here, we want to find the place within the file,
# where the correlator we need is stored.
if compact:
# to do so, the pattern needed is put together
# from the input values
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
# and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
# the start and end point of the correlator
# in quaetion is extracted for later use in
# in question is extracted for later use in
# the other files
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
@ -248,7 +230,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
pattern += ", wf_2 " + str(wf2)
qs = quarks.split(" ")
pattern += " : " + qs[0] + " - " + qs[1]
# print(pattern)
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
@ -265,30 +246,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
start = k + 7 + b2b
T -= b2b
print(str(T) + " entries found.")
# we found where the correlator
# that is to be read is in the files
# after preparing the datastructure
# the correlators get parsed into...
deltas = []
for j in range(T):
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(no_cfg))
# ...the actual parsing can start.
# we iterate through all measurement files in the path given...
if compact:
for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
lines = fp.readlines()
# check, if the correlator is in fact
# printed completely
if(start_read + T > len(lines)):
raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
# and start to read the correlator.
# the range here is chosen like this,
# since this allows for implementing
# a security check for every read correlator later...
for k in range(start_read - 6, start_read + T):
if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name:
@ -300,10 +270,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
else:
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
# since the non-compatified files
# are typically not so long,
# we can iterate over the whole file.
# here one can also implement the chekc from above.
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
@ -320,7 +286,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, prefix + '*.' + name):
ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
# print(ls)
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
@ -335,7 +300,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if len(set([data_starts[i] - data_starts[i - 1] for i in
range(1, len(data_starts))])) > 1:
raise Exception("Irregularities in file structure found, not all runs have the same output length")
# first chunk of data
chunk = content[:data_starts[1]]
for linenumber, line in enumerate(chunk):
if line.startswith("gauge_name"):
@ -354,12 +318,10 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(len(data_starts)))
# all other chunks should follow the same structure
for cnfg in range(len(data_starts)):
start = data_starts[cnfg]
stop = start + data_starts[1]
chunk = content[start:stop]
# meta_data = {}
try:
rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
except Exception: