minor bug fixes, big bug fix in append mode read in

This commit is contained in:
jkuhl-uni 2022-02-07 15:11:55 +01:00
parent b36d559ac3
commit ecfaa8ed67

View file

@ -118,11 +118,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
for exc in ls: for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*'): if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc])) ls = list(set(ls) - set([exc]))
if len(ls) > 1:
# New version, to cope with ids, etc.
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
if not appended: if not appended:
if len(ls) > 1:
# New version, to cope with ids, etc.
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls) replica = len(ls)
else: else:
replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls])) replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
@ -213,58 +213,39 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if i == 0: if i == 0:
# here, we want to find the place within the file, # here, we want to find the place within the file,
# where the correlator we need is stored. # where the correlator we need is stored.
if compact: # to do so, the pattern needed is put together
# to do so, the pattern needed is put together # from the input values
# from the input values if version == "0.0":
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) for k, line in enumerate(file):
if b2b:
pattern += '\nwf_2 ' + str(wf2)
# and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
# the start and end point of the correlator
# in quaetion is extracted for later use in
# the other files
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
assert T > 0
print(T, 'entries, starting to read in line', start_read)
else:
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
else:
# this part does the same as above,
# but for non-compactified versions of the files
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if version == "0.0":
# check if this is really the right file
# by matching pattern similar to above
pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
# if b2b, a second wf is needed
if b2b:
pattern += ", wf_2 " + str(wf2)
qs = quarks.split(" ")
pattern += " : " + qs[0] + " - " + qs[1]
# print(pattern)
if read == 1 and not line.strip() and k > start + 1: if read == 1 and not line.strip() and k > start + 1:
break break
if read == 1 and k >= start: if read == 1 and k >= start:
T += 1 T += 1
if pattern in line:
read = 1
start = k + 1
print(str(T) + " entries found.")
else:
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
# and the file is parsed through to find the pattern
if compact:
file = open(path + '/' + item + '/' + sub_ls[0], "r")
else:
# for non-compactified versions of the files
file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
if version == "0.0": content = file.read()
if pattern in line: match = re.search(pattern, content)
# print(line) if match:
read = 1 start_read = content.count('\n', 0, match.start()) + 5 + b2b
start = k + 1 end_match = re.search(r'\n\s*\n', content[match.start():])
else: T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
if '[correlator]' in line: assert T > 0
read = 1 print(T, 'entries, starting to read in line', start_read)
start = k + 7 + b2b else:
T -= b2b raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
print(str(T) + " entries found.")
# we found where the correlator # we found where the correlator
# that is to be read is in the files # that is to be read is in the files
# after preparing the datastructure # after preparing the datastructure
@ -292,8 +273,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
for k in range(start_read - 6, start_read + T): for k in range(start_read - 6, start_read + T):
if k == start_read - 5 - b2b: if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name: if lines[k].strip() != 'name ' + name:
raise Exception('Wrong format', raise Exception('Wrong format', sub_ls[cfg])
sub_ls[cfg])
if(k >= start_read and k < start_read + T): if(k >= start_read and k < start_read + T):
floats = list(map(float, lines[k].split())) floats = list(map(float, lines[k].split()))
deltas[k - start_read][i][cfg] = floats[-2:][im] deltas[k - start_read][i][cfg] = floats[-2:][im]
@ -305,12 +285,12 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
# we can iterate over the whole file. # we can iterate over the whole file.
# here one can also implement the chekc from above. # here one can also implement the chekc from above.
for k, line in enumerate(fp): for k, line in enumerate(fp):
if(k >= start and k < start + T): if(k >= start_read and k < start_read + T):
floats = list(map(float, line.split())) floats = list(map(float, line.split()))
if version == "0.0": if version == "0.0":
deltas[k - start][i][cnfg] = floats[im] deltas[k - start][i][cnfg] = floats[im - single]
else: else:
deltas[k - start][i][cnfg] = floats[1 + im - single] deltas[k - start_read][i][cnfg] = floats[1 + im - single]
else: else:
if "files" in kwargs: if "files" in kwargs:
@ -320,7 +300,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, prefix + '*.' + name): if not fnmatch.fnmatch(exc, prefix + '*.' + name):
ls = list(set(ls) - set([exc])) ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
# print(ls)
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b: if b2b:
pattern += '\nwf_2 ' + str(wf2) pattern += '\nwf_2 ' + str(wf2)
@ -332,8 +311,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
for linenumber, line in enumerate(content): for linenumber, line in enumerate(content):
if "[run]" in line: if "[run]" in line:
data_starts.append(linenumber) data_starts.append(linenumber)
if len(set([data_starts[i] - data_starts[i - 1] for i in if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
range(1, len(data_starts))])) > 1:
raise Exception("Irregularities in file structure found, not all runs have the same output length") raise Exception("Irregularities in file structure found, not all runs have the same output length")
# first chunk of data # first chunk of data
chunk = content[:data_starts[1]] chunk = content[:data_starts[1]]
@ -343,11 +321,15 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
elif line.startswith("[correlator]"): elif line.startswith("[correlator]"):
corr_line = linenumber corr_line = linenumber
found_pat = "" found_pat = ""
for li in chunk[corr_line + 1:corr_line + 6 + b2b]: for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
found_pat += li found_pat += li
if re.search(pattern, found_pat): if re.search(pattern, found_pat):
start_read = corr_line + 7 + b2b start_read = corr_line + 7 + b2b
T = len(chunk) - 1 - start_read break
endline = corr_line + 6 + b2b
while not chunk[endline] == "\n":
endline += 1
T = endline - start_read
if rep == 0: if rep == 0:
deltas = [] deltas = []
for t in range(T): for t in range(T):
@ -363,7 +345,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
try: try:
rep_idl.append(int(chunk[gauge_line].split("n")[-1])) rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
except Exception: except Exception:
raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line) raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
found_pat = "" found_pat = ""
for li in chunk[corr_line + 1:corr_line + 6 + b2b]: for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
@ -371,7 +353,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if re.search(pattern, found_pat): if re.search(pattern, found_pat):
for t, line in enumerate(chunk[start_read:start_read + T]): for t, line in enumerate(chunk[start_read:start_read + T]):
floats = list(map(float, line.split())) floats = list(map(float, line.split()))
deltas[t][rep][cnfg] = floats[-2:][im] deltas[t][rep][cnfg] = floats[im + 1 - single]
idl.append(rep_idl) idl.append(rep_idl)
if "check_configs" in kwargs: if "check_configs" in kwargs: