Merge branch 'develop' into documentation

This commit is contained in:
fjosw 2022-01-16 15:54:32 +00:00
commit 399547a957
2 changed files with 6 additions and 75 deletions

View file

@ -1,6 +1,3 @@
#!/usr/bin/env python
# coding: utf-8
import os
import fnmatch
import re
@ -42,7 +39,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
if 'files' in kwargs:
ls = kwargs.get('files')
else:
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
ls = list(set(ls) - set([exc]))
@ -69,7 +65,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
print('Read reweighting factors from', prefix[:-1], ',',
replica, 'replica', end='')
# Adjust replica names to new bookmarking system
if names is None:
rep_names = []
for entry in ls:
@ -88,7 +83,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
tmp_array = []
with open(path + '/' + ls[rep], 'rb') as fp:
# header
t = fp.read(4) # number of reweighting factors
if rep == 0:
nrw = struct.unpack('i', t)[0]
@ -97,7 +91,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
for k in range(nrw):
deltas.append([])
else:
# little weird if-clause due to the /2 operation needed.
if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):
raise Exception('Error: different number of reweighting factors for replicum', rep)
@ -110,8 +103,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
for i in range(nrw):
t = fp.read(4)
nfct.append(struct.unpack('i', t)[0])
# print('nfct: ', nfct) # Hasenbusch factor,
# 1 for rat reweighting
else:
for i in range(nrw):
nfct.append(1)
@ -124,7 +115,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
if not struct.unpack('i', fp.read(4))[0] == 0:
print('something is wrong!')
# body
while 0 < 1:
t = fp.read(4)
if len(t) < 4:
@ -220,7 +210,6 @@ def extract_t0(path, prefix, dtr_read, xmin,
if not ls:
raise Exception('Error, directory not found')
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'):
ls = list(set(ls) - set([exc]))
@ -232,7 +221,6 @@ def extract_t0(path, prefix, dtr_read, xmin,
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
@ -251,7 +239,6 @@ def extract_t0(path, prefix, dtr_read, xmin,
for rep in range(replica):
with open(path + '/' + ls[rep], 'rb') as fp:
# Read header
t = fp.read(12)
header = struct.unpack('iii', t)
if rep == 0:
@ -270,7 +257,6 @@ def extract_t0(path, prefix, dtr_read, xmin,
Ysl = []
# Read body
while 0 < 1:
t = fp.read(4)
if(len(t) < 4):
@ -334,12 +320,6 @@ def _parse_array_openQCD2(d, n, size, wa, quadrupel=False):
return arr
# mimic the read_array routine of openQCD-2.0.
# fp is the opened file handle
# returns the dict array
# at this point we only parse a 2d array
# d = 2
# n = [nfct[irw], 2*nsrc[irw]]
def _read_array_openQCD2(fp):
t = fp.read(4)
d = struct.unpack('i', t)[0]
@ -400,13 +380,11 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
last configurations that need to be read (per replicum)
files: list
specify the exact files that need to be read
from path, pratical if e.g. only one replicum is needed
from path, practical if e.g. only one replicum is needed
names: list
Alternative labeling for replicas/ensembles.
Has to have the appropriate length
"""
# one could read L from the header in case of sfQCD
# c = 0.35
known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"]
if version not in known_versions:
@ -426,11 +404,9 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
r_start = kwargs.get("r_start")
if "r_stop" in kwargs:
r_stop = kwargs.get("r_stop")
# if one wants to read specific files with this method...
if "files" in kwargs:
files = kwargs.get("files")
else:
# find files in path
found = []
files = []
for (dirpath, dirnames, filenames) in os.walk(path + "/"):
@ -441,14 +417,12 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"):
files.append(f)
print(files)
# now that we found our files, we dechiffer them...
rep_names = []
deltas = []
idl = []
for rep, file in enumerate(files):
with open(path + "/" + file, "rb") as fp:
# header
t = fp.read(12)
header = struct.unpack('<iii', t)
# step size in integration steps "dnms"
@ -477,7 +451,6 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
Q = []
ncs = []
while 0 < 1:
# int nt
t = fp.read(4)
if(len(t) < 4):
break
@ -524,8 +497,6 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
if len(Q_round) != len(ncs) // dtr_cnfg:
raise Exception("qtops and ncs dont have the same length")
# replica = len(files)
truncated_file = file[:-7]
print(truncated_file)
idl_start = 1
@ -553,7 +524,6 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
rep_names = names
deltas.append(np.array(Q_round))
idl.append(range(idl_start, idl_stop))
# print(idl)
result = Obs(deltas, rep_names, idl=idl)
return result
@ -594,7 +564,6 @@ def read_qtop_sector(target=0, **kwargs):
dtr_cnfg = 1
qtop = read_qtop(path, prefix, c, dtr_cnfg=dtr_cnfg,
version=version, **kwargs)
# unpack to original values, project onto target sector
names = qtop.names
print(names)
print(qtop.deltas.keys())

View file

@ -43,15 +43,16 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
replaces the name of the ensemble
version: str
version of SFCF, with which the measurement was done.
if the compact output option (-c) was spectified,
if the compact output option (-c) was specified,
append a "c" to the version (e.g. "1.0c")
if the append output option (-a) was specified,
append an "a" to the version
append an "a" to the version. Currently supported versions
are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a".
replica: list
list of replica to be read, default is all
files: list
list of files to be read per replica, default is all.
for non-conpact ouztput format, hand the folders to be read here.
for non-compact output format, hand the folders to be read here.
check_configs:
list of list of supposed configs, eg. [range(1,1000)]
for one replicum with 1000 configs
@ -77,17 +78,12 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if "replica" in kwargs:
reps = kwargs.get("replica")
# due to higher usage in current projects,
# compact file format is default
compact = True
appended = False
# get version string
known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
if version not in known_versions:
raise Exception("This version is not known!")
# if the letter c is appended to the version,
# the compact fileformat is used (former read_sfcf_c)
if(version[-1] == "c"):
appended = False
compact = True
@ -119,7 +115,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
# New version, to cope with ids, etc.
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
if not appended:
@ -135,8 +130,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
# Adjust replica names to new bookmarking system
new_names = []
if not appended:
for entry in ls:
@ -157,7 +150,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
for entry in ls:
myentry = entry[:-len(name) - 1]
# print(myentry)
try:
idx = myentry.index('r')
except Exception:
@ -167,7 +159,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
else:
new_names.append(myentry[:idx] + '|' + myentry[idx:])
# print(new_names)
idl = []
if not appended:
for i, item in enumerate(ls):
@ -183,7 +174,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
sub_ls.extend(dirnames)
break
# print(sub_ls)
for exc in sub_ls:
if compact:
if not fnmatch.fnmatch(exc, prefix + '*'):
@ -194,7 +184,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(x[3:]))
# print(sub_ls)
rep_idl = []
no_cfg = len(sub_ls)
for cfg in sub_ls:
@ -206,26 +195,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
except Exception:
raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
rep_idl.sort()
# maybe there is a better way to print the idls
print(item, ':', no_cfg, ' configurations')
idl.append(rep_idl)
# here we have found all the files we need to look into.
if i == 0:
# here, we want to find the place within the file,
# where the correlator we need is stored.
if compact:
# to do so, the pattern needed is put together
# from the input values
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
# and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
# the start and end point of the correlator
# in quaetion is extracted for later use in
# in question is extracted for later use in
# the other files
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
@ -248,7 +230,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
pattern += ", wf_2 " + str(wf2)
qs = quarks.split(" ")
pattern += " : " + qs[0] + " - " + qs[1]
# print(pattern)
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
@ -265,30 +246,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
start = k + 7 + b2b
T -= b2b
print(str(T) + " entries found.")
# we found where the correlator
# that is to be read is in the files
# after preparing the datastructure
# the correlators get parsed into...
deltas = []
for j in range(T):
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(no_cfg))
# ...the actual parsing can start.
# we iterate through all measurement files in the path given...
if compact:
for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
lines = fp.readlines()
# check, if the correlator is in fact
# printed completely
if(start_read + T > len(lines)):
raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
# and start to read the correlator.
# the range here is chosen like this,
# since this allows for implementing
# a security check for every read correlator later...
for k in range(start_read - 6, start_read + T):
if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name:
@ -300,10 +270,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
else:
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
# since the non-compatified files
# are typically not so long,
# we can iterate over the whole file.
# here one can also implement the chekc from above.
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
@ -320,7 +286,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if not fnmatch.fnmatch(exc, prefix + '*.' + name):
ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
# print(ls)
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
@ -335,7 +300,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
if len(set([data_starts[i] - data_starts[i - 1] for i in
range(1, len(data_starts))])) > 1:
raise Exception("Irregularities in file structure found, not all runs have the same output length")
# first chunk of data
chunk = content[:data_starts[1]]
for linenumber, line in enumerate(chunk):
if line.startswith("gauge_name"):
@ -354,12 +318,10 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(len(data_starts)))
# all other chunks should follow the same structure
for cnfg in range(len(data_starts)):
start = data_starts[cnfg]
stop = start + data_starts[1]
chunk = content[start:stop]
# meta_data = {}
try:
rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
except Exception: