implemented read_sfcf for append-mode output, bug fixes

This commit is contained in:
jkuhl-uni 2022-01-03 21:34:04 +01:00
parent 31c2ada963
commit ebdc17aa66

View file

@ -8,7 +8,7 @@ import numpy as np # Thinly-wrapped numpy
from ..obs import Obs
from . import utils
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0", **kwargs):
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0c", **kwargs):
"""Read sfcf c format from given folder structure.
Parameters
@ -65,186 +65,283 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
#due to higher usage in current projects, compact file format is default
compact = True
appended = False
#get version string
known_versions = ["0.0","1.0","2.0","1.0c","2.0c","1.0a","2.0a"]
if "version" in kwargs:
version = kwargs.get("version")
if not version in known_versions:
raise Exception("This version is not known!")
#if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c)
if(version[-1] == "c"):
compact = True
version = version[:-1]
else:
compact = False
if not version in known_versions:
raise Exception("This version is not known!")
#if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c)
if(version[-1] == "c"):
appended = False
compact = True
version = version[:-1]
elif(version[-1] == "a"):
appended = True
compact = False
version = version[:-1]
else:
compact = False
appended = False
read = 0
T = 0
start = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
raise Exception('Error, directory not found')
# Exclude folders with different names
if "replica" in kwargs:
ls = reps
else:
for (dirpath, dirnames, filenames) in os.walk(path):
if not appended:
ls.extend(dirnames)
else:
ls.extend(filenames)
break
if not ls:
raise Exception('Error, directory not found')
# Exclude folders with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
if not appended:
replica = len(ls)
else:
replica = len([l.split(".")[-1] for l in ls])//len(set([l.split(".")[-1] for l in ls]))
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
idl = []
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names)!=len(set(new_names)):
raise Exception("names are nor unique!")
raise Exception("names are not unique!")
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
# Adjust replica names to new bookmarking system
new_names = []
for entry in ls:
try:
idx = entry.index('r')
except:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
else:
new_names.append(entry[:idx] + '|' + entry[idx:])
for i, item in enumerate(ls):
sub_ls = []
if "files" in kwargs:
sub_ls = kwargs.get("files")
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
if not appended:
for entry in ls:
try:
idx = entry.index('r')
except:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
else:
new_names.append(entry[:idx] + '|' + entry[idx:])
else:
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
if compact:
sub_ls.extend(filenames)
else:
sub_ls.extend(dirnames)
break
#print(sub_ls)
for exc in sub_ls:
if compact:
if not fnmatch.fnmatch(exc, prefix + '*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
else:
if not fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(x[3:]))
#print(sub_ls)
rep_idl = []
no_cfg = len(sub_ls)
for cfg in sub_ls:
try:
if compact:
rep_idl.append(int(cfg.split("n")[-1]))
else:
rep_idl.append(int(cfg[3:]))
except:
raise Exception("Couldn't parse idl from directroy, problem with file "+cfg)
rep_idl.sort()
#maybe there is a better way to print the idls
print(item, ':', no_cfg, ' configurations')
idl.append(rep_idl)
#here we have found all the files we need to look into.
if i == 0:
#here, we want to find the place within the file, where the correlator we need is stored.
if compact:
#to do so, the pattern needed is put together from the input values
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
#and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
#the start and end point of the correlator in quaetion is extracted for later use in the other files
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
assert T > 0
print(T, 'entries, starting to read in line', start_read)
else:
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.'+name):
ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
for entry in ls:
myentry = entry.removesuffix("."+name)
try:
idx = myentry.index('r')
except:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
else:
new_names.append(myentry[:idx] + '|' + myentry[idx:])
#print(new_names)
idl = []
if not appended:
for i, item in enumerate(ls):
sub_ls = []
if "files" in kwargs:
sub_ls = kwargs.get("files")
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
else:
#this part does the same as above, but for non-compactified versions of the files
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if version == "0.0":
#check if this is really the right file by matchin pattern similar to above
pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf)
#if b2b, a second wf is needed
if b2b:
pattern+=", wf_2 "+str(wf2)
qs = quarks.split(" ")
pattern+=" : "+qs[0]+" - "+qs[1]
#print(pattern)
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
T += 1
if version == "0.0":
if pattern in line:
#print(line)
read = 1
start = k+1
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
if compact:
sub_ls.extend(filenames)
else:
sub_ls.extend(dirnames)
break
#print(sub_ls)
for exc in sub_ls:
if compact:
if not fnmatch.fnmatch(exc, prefix + '*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
else:
if not fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(x[3:]))
#print(sub_ls)
rep_idl = []
no_cfg = len(sub_ls)
for cfg in sub_ls:
try:
if compact:
rep_idl.append(int(cfg.split("n")[-1]))
else:
rep_idl.append(int(cfg[3:]))
except:
raise Exception("Couldn't parse idl from directroy, problem with file "+cfg)
rep_idl.sort()
#maybe there is a better way to print the idls
print(item, ':', no_cfg, ' configurations')
idl.append(rep_idl)
#here we have found all the files we need to look into.
if i == 0:
#here, we want to find the place within the file, where the correlator we need is stored.
if compact:
#to do so, the pattern needed is put together from the input values
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
#and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
#the start and end point of the correlator in quaetion is extracted for later use in the other files
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
assert T > 0
print(T, 'entries, starting to read in line', start_read)
else:
if '[correlator]' in line:
read = 1
start = k + 7 + b2b
T -= b2b
print(str(T)+" entries found.")
#we found where the correlator that is to be read is in the files
#after preparing the datastructure the correlators get parsed into...
deltas = []
for j in range(T):
deltas.append([])
sublength = no_cfg
for j in range(T):
deltas[j].append(np.zeros(sublength))
#... the actual parsing can start. we iterate through all measurement files in the path given...
if compact:
for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
lines = fp.readlines()
#check, if the correlator is in fact printed completely
if(start_read + T>len(lines)):
raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?")
#and start to read the correlator.
#the range here is chosen like this, since this allows for implementing a security check for every read correlator later...
for k in range(start_read - 6,start_read + T):
if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name:
raise Exception('Wrong format', sub_ls[cfg])
if(k >= start_read and k < start_read + T):
floats = list(map(float, lines[k].split()))
deltas[k - start_read][i][cfg] = floats[-2:][im]
else:
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
#since the non-compatified files are typically not so long, we can iterate over the whole file.
#here one can also implement the chekc from above.
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
else:
#this part does the same as above, but for non-compactified versions of the files
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if version == "0.0":
deltas[k-start][i][cnfg] = floats[im]
else:
deltas[k - start][i][cnfg] = floats[1 + im - single]
#check if this is really the right file by matchin pattern similar to above
pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf)
#if b2b, a second wf is needed
if b2b:
pattern+=", wf_2 "+str(wf2)
qs = quarks.split(" ")
pattern+=" : "+qs[0]+" - "+qs[1]
#print(pattern)
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
T += 1
if version == "0.0":
if pattern in line:
#print(line)
read = 1
start = k+1
else:
if '[correlator]' in line:
read = 1
start = k + 7 + b2b
T -= b2b
print(str(T)+" entries found.")
#we found where the correlator that is to be read is in the files
#after preparing the datastructure the correlators get parsed into...
deltas = []
for j in range(T):
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(no_cfg))
#... the actual parsing can start. we iterate through all measurement files in the path given...
if compact:
for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
lines = fp.readlines()
#check, if the correlator is in fact printed completely
if(start_read + T>len(lines)):
raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?")
#and start to read the correlator.
#the range here is chosen like this, since this allows for implementing a security check for every read correlator later...
for k in range(start_read - 6,start_read + T):
if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name:
raise Exception('Wrong format', sub_ls[cfg])
if(k >= start_read and k < start_read + T):
floats = list(map(float, lines[k].split()))
deltas[k - start_read][i][cfg] = floats[-2:][im]
else:
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
#since the non-compatified files are typically not so long, we can iterate over the whole file.
#here one can also implement the chekc from above.
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
if version == "0.0":
deltas[k-start][i][cnfg] = floats[im]
else:
deltas[k - start][i][cnfg] = floats[1 + im - single]
else:
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.'+name):
ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
#print(ls)
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
for rep,file in enumerate(ls):
rep_idl = []
with open(path + '/' + file, 'r') as fp:
content = fp.readlines()
data_starts = []
for l,line in enumerate(content):
if "[run]" in line:
data_starts.append(l)
if len(set([data_starts[i]-data_starts[i-1] for i in range(1,len(data_starts))])) > 1:
raise Exception ("Irregularities in file structure found, not all runs have the same output length")
#print(data_starts)
#first chunk of data
chunk = content[:data_starts[1]]
for l,line in enumerate(chunk):
if line.startswith("gauge_name"):
gauge_line = l
#meta_data["gauge_name"] = (line.strip()).split("/")[-1]
elif line.startswith("[correlator]"):
corr_line = l
found_pat = ""
for li in chunk[corr_line+1:corr_line+6+b2b]:
found_pat += li
if re.search(pattern,found_pat):
start_read = corr_line+7+b2b
T=len(chunk)-1-start_read
if rep == 0:
deltas = []
for t in range(T):
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(len(data_starts)))
#all other chunks should follow the same structure
for cnfg in range(len(data_starts)):
start = data_starts[cnfg]
stop = start+data_starts[1]
chunk = content[start:stop]
#meta_data = {}
try:
rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
except:
raise Exception("Couldn't parse idl from directroy, problem with chunk around line "+gauge_line)
found_pat = ""
for li in chunk[corr_line+1:corr_line+6+b2b]:
found_pat += li
if re.search(pattern,found_pat):
#print("found pattern")
for t,line in enumerate(chunk[start_read:start_read+T]):
floats = list(map(float, line.split()))
deltas[t][rep][cnfg] = floats[-2:][im]
idl.append(rep_idl)
#print(new_names)
#print(deltas)
#print(idl)
if "check_configs" in kwargs:
print("Checking for missing configs...")
che = kwargs.get("check_configs")