mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-05-15 20:13:41 +02:00
implemented idl into sfcf-read method
This commit is contained in:
parent
5e5a9df404
commit
c5292f8342
2 changed files with 105 additions and 167 deletions
|
@ -6,125 +6,41 @@ import fnmatch
|
||||||
import re
|
import re
|
||||||
import numpy as np # Thinly-wrapped numpy
|
import numpy as np # Thinly-wrapped numpy
|
||||||
from ..obs import Obs
|
from ..obs import Obs
|
||||||
|
from . import utils
|
||||||
|
|
||||||
def read_sfcf_old(path, prefix, name, quarks, noffset = 0, wf=0, wf2=0, **kwargs):
|
|
||||||
"""Read sfcf format (from around 2012) from given folder structure.
|
|
||||||
|
|
||||||
Keyword arguments
|
|
||||||
-----------------
|
|
||||||
im -- if True, read imaginary instead of real part of the correlation function.
|
|
||||||
single -- if True, read a boundary-to-boundary correlation function with a single value
|
|
||||||
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
|
|
||||||
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
|
|
||||||
"""
|
|
||||||
if kwargs.get('im'):
|
|
||||||
im = 1
|
|
||||||
part = 'imaginary'
|
|
||||||
else:
|
|
||||||
im = 0
|
|
||||||
part = 'real'
|
|
||||||
|
|
||||||
b2b = 0
|
|
||||||
|
|
||||||
if kwargs.get('b2b'):
|
|
||||||
b2b = 1
|
|
||||||
|
|
||||||
quarks = quarks.split(" ")
|
|
||||||
read = 0
|
|
||||||
T = 0
|
|
||||||
start = 0
|
|
||||||
ls = []
|
|
||||||
for (dirpath, dirnames, filenames) in os.walk(path):
|
|
||||||
ls.extend(dirnames)
|
|
||||||
break
|
|
||||||
if not ls:
|
|
||||||
print('Error, directory not found')
|
|
||||||
#sys.exit()
|
|
||||||
for exc in ls:
|
|
||||||
if fnmatch.fnmatch(exc, prefix + '*'):
|
|
||||||
ls = list(set(ls) - set(exc))
|
|
||||||
if len(ls) > 1:
|
|
||||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
|
|
||||||
replica = len(ls)
|
|
||||||
print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
|
|
||||||
if 'names' in kwargs:
|
|
||||||
new_names = kwargs.get('names')
|
|
||||||
if len(new_names) != replica:
|
|
||||||
raise Exception('Names does not have the required length', replica)
|
|
||||||
else:
|
|
||||||
new_names = ls
|
|
||||||
print(replica, 'replica')
|
|
||||||
for i, item in enumerate(ls):
|
|
||||||
print(item)
|
|
||||||
sub_ls = []
|
|
||||||
for (dirpath, dirnames, filenames) in os.walk(path+'/'+item):
|
|
||||||
sub_ls.extend(dirnames)
|
|
||||||
break
|
|
||||||
for exc in sub_ls:
|
|
||||||
if fnmatch.fnmatch(exc, 'cfg*'):
|
|
||||||
sub_ls = list(set(sub_ls) - set(exc))
|
|
||||||
sub_ls.sort(key=lambda x: int(x[3:]))
|
|
||||||
no_cfg = len(sub_ls)
|
|
||||||
print(no_cfg, 'configurations')
|
|
||||||
if i == 0:
|
|
||||||
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
|
|
||||||
for k, line in enumerate(fp):
|
|
||||||
#check if this is really the right file
|
|
||||||
pattern = "# "+name+" : offset "+str(noffset)+", wf "+"0"
|
|
||||||
#if b2b, a second wf is needed
|
|
||||||
if b2b:
|
|
||||||
pattern+=", wf_2 "+"0"
|
|
||||||
pattern+=" : "+quarks[0]+" - "+quarks[1]
|
|
||||||
|
|
||||||
if read == 1 and not line.strip() and k > start + 1:
|
|
||||||
break
|
|
||||||
if read == 1 and k >= start:
|
|
||||||
T += 1
|
|
||||||
if pattern in line:
|
|
||||||
#print(line)
|
|
||||||
read = 1
|
|
||||||
start = k+1
|
|
||||||
print(str(T)+" entries found.")
|
|
||||||
|
|
||||||
deltas = []
|
|
||||||
for j in range(T):
|
|
||||||
deltas.append([])
|
|
||||||
|
|
||||||
sublength = len(sub_ls)
|
|
||||||
for j in range(T):
|
|
||||||
deltas[j].append(np.zeros(sublength))
|
|
||||||
|
|
||||||
for cnfg, subitem in enumerate(sub_ls):
|
|
||||||
with open(path + '/' + item + '/' + subitem + '/'+name) as fp:
|
|
||||||
for k, line in enumerate(fp):
|
|
||||||
if(k >= start and k < start + T):
|
|
||||||
floats = list(map(float, line.split()))
|
|
||||||
deltas[k-start][i][cnfg] = floats[im]
|
|
||||||
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for t in range(T):
|
|
||||||
result.append(Obs(deltas[t], new_names))
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
|
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
|
||||||
"""Read sfcf c format from given folder structure.
|
"""Read sfcf c format from given folder structure.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
quarks -- Label of the quarks used in the sfcf input file
|
quarks: str
|
||||||
noffset -- Offset of the source (only relevant when wavefunctions are used)
|
Label of the quarks used in the sfcf input file. e.g. "quark quark"
|
||||||
wf -- ID of wave function
|
for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file,
|
||||||
wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
|
this is done automatically for this version
|
||||||
im -- if True, read imaginary instead of real part of the correlation function.
|
noffset: int
|
||||||
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
|
Offset of the source (only relevant when wavefunctions are used)
|
||||||
single -- if True, read time independent boundary to boundary correlation function
|
wf: int
|
||||||
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
|
ID of wave function
|
||||||
|
wf2: int
|
||||||
|
ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
|
||||||
|
im: bool
|
||||||
|
if True, read imaginary instead of real part of the correlation function.
|
||||||
|
b2b: bool
|
||||||
|
if True, read a time-dependent boundary-to-boundary correlation function
|
||||||
|
single: bool
|
||||||
|
if True, read time independent boundary to boundary correlation function
|
||||||
|
names: list
|
||||||
|
Alternative labeling for replicas/ensembles. Has to have the appropriate length
|
||||||
ens_name : str
|
ens_name : str
|
||||||
replaces the name of the ensemble
|
replaces the name of the ensemble
|
||||||
|
version: str
|
||||||
|
version of SFCF, with which the measurement was done. if the compact output option (-c) was spectified, append a c to the version (e.g. "1.0c")
|
||||||
|
replica: list
|
||||||
|
list of replica to be read, default is all
|
||||||
|
files: list
|
||||||
|
list of files to be read per replica, default is all. for non-conpact ouztput format, hand the folders to be read here.
|
||||||
|
check_configs:
|
||||||
|
list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
|
||||||
"""
|
"""
|
||||||
if kwargs.get('im'):
|
if kwargs.get('im'):
|
||||||
im = 1
|
im = 1
|
||||||
|
@ -142,8 +58,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
else:
|
else:
|
||||||
b2b = 0
|
b2b = 0
|
||||||
single = 0
|
single = 0
|
||||||
|
if "replica" in kwargs:
|
||||||
files = []
|
reps = kwargs.get("replica")
|
||||||
if "files" in kwargs:
|
if "files" in kwargs:
|
||||||
files = kwargs.get("files")
|
files = kwargs.get("files")
|
||||||
|
|
||||||
|
@ -172,8 +88,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
if not ls:
|
if not ls:
|
||||||
raise Exception('Error, directory not found')
|
raise Exception('Error, directory not found')
|
||||||
# Exclude folders with different names
|
# Exclude folders with different names
|
||||||
if len(files) != 0:
|
if "replica" in kwargs:
|
||||||
ls = files
|
ls = reps
|
||||||
else:
|
else:
|
||||||
for exc in ls:
|
for exc in ls:
|
||||||
if not fnmatch.fnmatch(exc, prefix + '*'):
|
if not fnmatch.fnmatch(exc, prefix + '*'):
|
||||||
|
@ -182,9 +98,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
|
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
|
||||||
replica = len(ls)
|
replica = len(ls)
|
||||||
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
|
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
|
||||||
|
idl = []
|
||||||
if 'names' in kwargs:
|
if 'names' in kwargs:
|
||||||
new_names = kwargs.get('names')
|
new_names = kwargs.get('names')
|
||||||
|
if len(new_names)!=len(set(new_names)):
|
||||||
|
raise Exception("names are nor unique!")
|
||||||
if len(new_names) != replica:
|
if len(new_names) != replica:
|
||||||
raise Exception('Names does not have the required length', replica)
|
raise Exception('Names does not have the required length', replica)
|
||||||
else:
|
else:
|
||||||
|
@ -194,13 +112,18 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
try:
|
try:
|
||||||
idx = entry.index('r')
|
idx = entry.index('r')
|
||||||
except:
|
except:
|
||||||
idx = len(entry)-2
|
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
|
||||||
|
|
||||||
if 'ens_name' in kwargs:
|
if 'ens_name' in kwargs:
|
||||||
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
|
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
|
||||||
else:
|
else:
|
||||||
new_names.append(entry[:idx] + '|' + entry[idx:])
|
new_names.append(entry[:idx] + '|' + entry[idx:])
|
||||||
for i, item in enumerate(ls):
|
for i, item in enumerate(ls):
|
||||||
sub_ls = []
|
sub_ls = []
|
||||||
|
if "files" in kwargs:
|
||||||
|
sub_ls = kwargs.get("files")
|
||||||
|
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
|
||||||
|
else:
|
||||||
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
|
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
|
||||||
if compact:
|
if compact:
|
||||||
sub_ls.extend(filenames)
|
sub_ls.extend(filenames)
|
||||||
|
@ -218,35 +141,36 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
if not fnmatch.fnmatch(exc, 'cfg*'):
|
if not fnmatch.fnmatch(exc, 'cfg*'):
|
||||||
sub_ls = list(set(sub_ls) - set([exc]))
|
sub_ls = list(set(sub_ls) - set([exc]))
|
||||||
sub_ls.sort(key=lambda x: int(x[3:]))
|
sub_ls.sort(key=lambda x: int(x[3:]))
|
||||||
|
#print(sub_ls)
|
||||||
if compact:
|
rep_idl = []
|
||||||
first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
|
|
||||||
|
|
||||||
last_cfg = len(sub_ls) + first_cfg - 1
|
|
||||||
|
|
||||||
for cfg in range(1, len(sub_ls)):
|
|
||||||
if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
|
|
||||||
last_cfg = cfg + first_cfg - 1
|
|
||||||
break
|
|
||||||
|
|
||||||
no_cfg = last_cfg - first_cfg + 1
|
|
||||||
print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
|
|
||||||
else:
|
|
||||||
no_cfg = len(sub_ls)
|
no_cfg = len(sub_ls)
|
||||||
print(no_cfg, 'configurations')
|
for cfg in sub_ls:
|
||||||
|
try:
|
||||||
|
if compact:
|
||||||
|
rep_idl.append(int(cfg.split("n")[-1]))
|
||||||
|
else:
|
||||||
|
rep_idl.append(int(cfg[3:]))
|
||||||
|
except:
|
||||||
|
raise Exception("Couldn't parse idl from directroy, problem with file "+cfg)
|
||||||
|
rep_idl.sort()
|
||||||
|
#maybe there is a better way to print the idls
|
||||||
|
print(item, ':', no_cfg, ' configurations')
|
||||||
|
idl.append(rep_idl)
|
||||||
#here we have found all the files we need to look into.
|
#here we have found all the files we need to look into.
|
||||||
if i == 0:
|
if i == 0:
|
||||||
if compact:
|
#here, we want to find the place within the file, where the correlator we need is stored.
|
||||||
|
|
||||||
|
if compact:
|
||||||
|
#to do so, the pattern needed is put together from the input values
|
||||||
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
|
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
|
||||||
if b2b:
|
if b2b:
|
||||||
pattern += '\nwf_2 ' + str(wf2)
|
pattern += '\nwf_2 ' + str(wf2)
|
||||||
|
#and the file is parsed through to find the pattern
|
||||||
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
|
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
|
||||||
content = file.read()
|
content = file.read()
|
||||||
match = re.search(pattern, content)
|
match = re.search(pattern, content)
|
||||||
if match:
|
if match:
|
||||||
|
#the start and end point of the correlator in quaetion is extracted for later use in the other files
|
||||||
start_read = content.count('\n', 0, match.start()) + 5 + b2b
|
start_read = content.count('\n', 0, match.start()) + 5 + b2b
|
||||||
end_match = re.search(r'\n\s*\n', content[match.start():])
|
end_match = re.search(r'\n\s*\n', content[match.start():])
|
||||||
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
|
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
|
||||||
|
@ -255,11 +179,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
else:
|
else:
|
||||||
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
|
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
|
||||||
else:
|
else:
|
||||||
#print(path + '/' + item + '/')# + sub_ls[0] + '/' + name)
|
#this part does the same as above, but for non-compactified versions of the files
|
||||||
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
|
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
|
||||||
for k, line in enumerate(fp):
|
for k, line in enumerate(fp):
|
||||||
if version == "0.0":
|
if version == "0.0":
|
||||||
#check if this is really the right file
|
#check if this is really the right file by matchin pattern similar to above
|
||||||
pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf)
|
pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf)
|
||||||
#if b2b, a second wf is needed
|
#if b2b, a second wf is needed
|
||||||
if b2b:
|
if b2b:
|
||||||
|
@ -284,6 +208,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
T -= b2b
|
T -= b2b
|
||||||
print(str(T)+" entries found.")
|
print(str(T)+" entries found.")
|
||||||
#we found where the correlator that is to be read is in the files
|
#we found where the correlator that is to be read is in the files
|
||||||
|
#after preparing the datastructure the correlators get parsed into...
|
||||||
deltas = []
|
deltas = []
|
||||||
for j in range(T):
|
for j in range(T):
|
||||||
deltas.append([])
|
deltas.append([])
|
||||||
|
@ -291,12 +216,16 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
sublength = no_cfg
|
sublength = no_cfg
|
||||||
for j in range(T):
|
for j in range(T):
|
||||||
deltas[j].append(np.zeros(sublength))
|
deltas[j].append(np.zeros(sublength))
|
||||||
|
#... the actual parsing can start. we iterate through all measurement files in the path given...
|
||||||
if compact:
|
if compact:
|
||||||
for cfg in range(no_cfg):
|
for cfg in range(no_cfg):
|
||||||
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
|
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
|
||||||
lines = fp.readlines()
|
lines = fp.readlines()
|
||||||
|
#check, if the correlator is in fact printed completely
|
||||||
if(start_read + T>len(lines)):
|
if(start_read + T>len(lines)):
|
||||||
raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?")
|
raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?")
|
||||||
|
#and start to read the correlator.
|
||||||
|
#the range here is chosen like this, since this allows for implementing a security check for every read correlator later...
|
||||||
for k in range(start_read - 6,start_read + T):
|
for k in range(start_read - 6,start_read + T):
|
||||||
if k == start_read - 5 - b2b:
|
if k == start_read - 5 - b2b:
|
||||||
if lines[k].strip() != 'name ' + name:
|
if lines[k].strip() != 'name ' + name:
|
||||||
|
@ -307,6 +236,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
else:
|
else:
|
||||||
for cnfg, subitem in enumerate(sub_ls):
|
for cnfg, subitem in enumerate(sub_ls):
|
||||||
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
|
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
|
||||||
|
#since the non-compatified files are typically not so long, we can iterate over the whole file.
|
||||||
|
#here one can also implement the chekc from above.
|
||||||
for k, line in enumerate(fp):
|
for k, line in enumerate(fp):
|
||||||
if(k >= start and k < start + T):
|
if(k >= start and k < start + T):
|
||||||
floats = list(map(float, line.split()))
|
floats = list(map(float, line.split()))
|
||||||
|
@ -315,9 +246,17 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
|
||||||
else:
|
else:
|
||||||
deltas[k - start][i][cnfg] = floats[1 + im - single]
|
deltas[k - start][i][cnfg] = floats[1 + im - single]
|
||||||
|
|
||||||
|
if "check_configs" in kwargs:
|
||||||
|
print("Chekcing for missing configs...")
|
||||||
|
che = kwargs.get("check_configs")
|
||||||
|
if not (len(che) == len(idl)):
|
||||||
|
raise Exception("check_configs has to be the same length as replica!")
|
||||||
|
for r in range(len(idl)):
|
||||||
|
print("checking "+new_names[r])
|
||||||
|
utils.check_idl(idl[r], che[r])
|
||||||
|
print("Done")
|
||||||
result = []
|
result = []
|
||||||
for t in range(T):
|
for t in range(T):
|
||||||
result.append(Obs(deltas[t], new_names))
|
result.append(Obs(deltas[t], new_names, idl = idl))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
import fnmatch
|
"""Utilities for the input"""
|
||||||
|
|
||||||
def check_missing(idl,che):
|
def check_idl(idl,che):
|
||||||
missing = []
|
missing = []
|
||||||
for ind in che:
|
for c in che:
|
||||||
if not ind in idl:
|
if not c in idl:
|
||||||
missing.append(ind)
|
missing.append(c)
|
||||||
if(len(missing) == 0):
|
#print missing such that it can directly be parsed to slurm terminal
|
||||||
print("There are no measurements missing.")
|
if not (len(missing) == 0):
|
||||||
else:
|
print(len(missing),"configs missing")
|
||||||
print(len(missing),"measurements missing")
|
|
||||||
miss_str = str(missing[0])
|
miss_str = str(missing[0])
|
||||||
for i in missing[1:]:
|
for i in missing[1:]:
|
||||||
miss_str += ","+str(i)
|
miss_str += ","+str(i)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue