Merge pull request #32 from jkuhl-uni/feature/input_2.0

Feature/input 2.0
This commit is contained in:
Fabian Joswig 2022-01-16 16:43:55 +01:00 committed by GitHub
commit c76e8b8bad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 777 additions and 270 deletions

View file

@ -39,13 +39,15 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
if not ls:
raise Exception('Error, directory not found')
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
if 'files' in kwargs:
ls = kwargs.get('files')
else:
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
@ -64,7 +66,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
else:
r_stop = [None] * replica
print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='')
print('Read reweighting factors from', prefix[:-1], ',',
replica, 'replica', end='')
# Adjust replica names to new bookmarking system
if names is None:
@ -94,7 +97,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
for k in range(nrw):
deltas.append([])
else:
if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): # little weird if-clause due to the /2 operation needed.
# little weird if-clause due to the /2 operation needed.
if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):
raise Exception('Error: different number of reweighting factors for replicum', rep)
for k in range(nrw):
@ -106,7 +110,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
for i in range(nrw):
t = fp.read(4)
nfct.append(struct.unpack('i', t)[0])
# print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
# print('nfct: ', nfct) # Hasenbusch factor,
# 1 for rat reweighting
else:
for i in range(nrw):
nfct.append(1)
@ -135,8 +140,11 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
for j in range(tmpd['n'][0]):
tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw[j])))
if print_err:
print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw[j]))), np.std(np.exp(-np.asarray(tmp_rw[j]))))
print('Sources:', np.exp(-np.asarray(tmp_rw[j])))
print(config_no, i, j,
np.mean(np.exp(-np.asarray(tmp_rw[j]))),
np.std(np.exp(-np.asarray(tmp_rw[j]))))
print('Sources:',
np.exp(-np.asarray(tmp_rw[j])))
print('Partial factor:', tmp_nfct)
elif version == '1.6' or version == '1.4':
tmp_nfct = 1.0
@ -146,7 +154,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
tmp_rw = struct.unpack('d' * nsrc[i], t)
tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw)))
if print_err:
print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw))))
print(config_no, i, j,
np.mean(np.exp(-np.asarray(tmp_rw))),
np.std(np.exp(-np.asarray(tmp_rw))))
print('Sources:', np.exp(-np.asarray(tmp_rw)))
print('Partial factor:', tmp_nfct)
tmp_array[i].append(tmp_nfct)
@ -165,11 +175,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
return result
def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs):
def extract_t0(path, prefix, dtr_read, xmin,
spatial_extent, fit_range=5, **kwargs):
"""Extract t0 from given .ms.dat files. Returns t0 as Obs.
It is assumed that all boundary effects have sufficiently decayed at x0=xmin.
The data around the zero crossing of t^2<E> - 0.3 is fitted with a linear function
It is assumed that all boundary effects have
sufficiently decayed at x0=xmin.
The data around the zero crossing of t^2<E> - 0.3
is fitted with a linear function
from which the exact root is extracted.
Only works with openQCD v 1.2.
@ -180,14 +193,17 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
prefix : str
Ensemble prefix
dtr_read : int
Determines how many trajectories should be skipped when reading the ms.dat files.
Determines how many trajectories should be skipped
when reading the ms.dat files.
Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
xmin : int
First timeslice where the boundary effects have sufficiently decayed.
First timeslice where the boundary
effects have sufficiently decayed.
spatial_extent : int
spatial extent of the lattice, required for normalization.
fit_range : int
Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5)
Number of data points left and right of the zero
crossing to be included in the linear fit. (Default: 5)
r_start : list
list which contains the first config to be read for each replicum.
r_stop: list
@ -273,7 +289,9 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
Ysum.append([])
for i, item in enumerate(Ysl):
Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)])
Ysum[-1].append([np.mean(item[current + xmin:
current + tmax - xmin])
for current in range(0, len(item), tmax)])
t2E_dict = {}
for n in range(nn + 1):
@ -286,10 +304,13 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3
zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0)
zero_crossing = np.argmax(np.array(
[o.value for o in t2E_dict.values()]) > 0.0)
x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range]
y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range]
x = list(t2E_dict.keys())[zero_crossing - fit_range:
zero_crossing + fit_range]
y = list(t2E_dict.values())[zero_crossing - fit_range:
zero_crossing + fit_range]
[o.gamma_method() for o in y]
fit_result = fit_lin(x, y)
@ -343,3 +364,243 @@ def _read_array_openQCD2(fp):
arr = _parse_array_openQCD2(d, n, size, tmp, quadrupel=True)
return {'d': d, 'n': n, 'size': size, 'arr': arr}
def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
"""Read qtop format from given folder structure.
Parameters
----------
path:
path of the measurement files
prefix:
prefix of the measurement files, e.g. <prefix>_id0_r0.ms.dat
c: double
Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L
dtr_cnfg: int
(optional) parameter that specifies the number of trajectories
between two configs.
if it is not set, the distance between two measurements
in the file is assumed to be
the distance between two configurations.
steps: int
(optional) (maybe only necessary for openQCD2.0)
nt step size, guessed if not given
version: str
version string of the openQCD (sfqcd) version used to create
the ensemble
L: int
spatial length of the lattice in L/a.
HAS to be set if version != sfqcd, since openQCD does not provide
this in the header
r_start: list
offset of the first ensemble, making it easier to match
later on with other Obs
r_stop: list
last configurations that need to be read (per replicum)
files: list
specify the exact files that need to be read
from path, pratical if e.g. only one replicum is needed
names: list
Alternative labeling for replicas/ensembles.
Has to have the appropriate length
"""
# one could read L from the header in case of sfQCD
# c = 0.35
known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"]
if version not in known_versions:
raise Exception("Unknown openQCD version.")
if "steps" in kwargs:
steps = kwargs.get("steps")
if version == "sfqcd":
if "L" in kwargs:
supposed_L = kwargs.get("L")
else:
if "L" not in kwargs:
raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.")
else:
L = kwargs.get("L")
r_start = 1
if "r_start" in kwargs:
r_start = kwargs.get("r_start")
if "r_stop" in kwargs:
r_stop = kwargs.get("r_stop")
# if one wants to read specific files with this method...
if "files" in kwargs:
files = kwargs.get("files")
else:
# find files in path
found = []
files = []
for (dirpath, dirnames, filenames) in os.walk(path + "/"):
# print(filenames)
found.extend(filenames)
break
for f in found:
if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"):
files.append(f)
print(files)
# now that we found our files, we dechiffer them...
rep_names = []
deltas = []
idl = []
for rep, file in enumerate(files):
with open(path + "/" + file, "rb") as fp:
# header
t = fp.read(12)
header = struct.unpack('<iii', t)
# step size in integration steps "dnms"
dn = header[0]
# number of measurements, so "ntot"/dn
nn = header[1]
# lattice T/a
tmax = header[2]
if version == "sfqcd":
t = fp.read(12)
Ls = struct.unpack('<iii', t)
if(Ls[0] == Ls[1] and Ls[1] == Ls[2]):
L = Ls[0]
if not (supposed_L == L):
raise Exception("It seems the length given in the header and by you contradict each other")
else:
raise Exception("Found more than one spatial length in header!")
print('dnms:', dn)
print('nn:', nn)
print('tmax:', tmax)
t = fp.read(8)
eps = struct.unpack('d', t)[0]
print('eps:', eps)
Q = []
ncs = []
while 0 < 1:
# int nt
t = fp.read(4)
if(len(t) < 4):
break
ncs.append(struct.unpack('i', t)[0])
# Wsl
t = fp.read(8 * tmax * (nn + 1))
# Ysl
t = fp.read(8 * tmax * (nn + 1))
# Qsl, which is asked for in this method
t = fp.read(8 * tmax * (nn + 1))
# unpack the array of Qtops,
# on each timeslice t=0,...,tmax-1 and the
# measurement number in = 0...nn (see README.qcd1)
tmpd = struct.unpack('d' * tmax * (nn + 1), t)
Q.append(tmpd)
if not len(set([ncs[i] - ncs[i - 1] for i in range(1, len(ncs))])):
raise Exception("Irregularities in stepsize found")
else:
if 'steps' in kwargs:
if steps != ncs[1] - ncs[0]:
raise Exception("steps and the found stepsize are not the same")
else:
steps = ncs[1] - ncs[0]
print(len(Q))
print('max_t:', dn * (nn) * eps)
t_aim = (c * L) ** 2 / 8
print('t_aim:', t_aim)
index_aim = round(t_aim / eps / dn)
print('index_aim:', index_aim)
Q_sum = []
for i, item in enumerate(Q):
Q_sum.append([sum(item[current:current + tmax])
for current in range(0, len(item), tmax)])
print(len(Q_sum))
print(len(Q_sum[0]))
Q_round = []
for i in range(len(Q) // dtr_cnfg):
Q_round.append(round(Q_sum[dtr_cnfg * i][index_aim]))
if len(Q_round) != len(ncs) // dtr_cnfg:
raise Exception("qtops and ncs dont have the same length")
# replica = len(files)
truncated_file = file[:-7]
print(truncated_file)
idl_start = 1
if "r_start" in kwargs:
Q_round = Q_round[r_start[rep]:]
idl_start = r_start[rep]
if "r_stop" in kwargs:
Q_round = Q_round[:r_stop[rep]]
idl_stop = idl_start + len(Q_round)
# keyword "names" prevails over "ens_name"
if "names" not in kwargs:
try:
idx = truncated_file.index('r')
except Exception:
if "names" not in kwargs:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
if "ens_name" in kwargs:
ens_name = kwargs.get("ens_name")
else:
ens_name = truncated_file[:idx]
rep_names.append(ens_name + '|' + truncated_file[idx:])
else:
names = kwargs.get("names")
rep_names = names
deltas.append(np.array(Q_round))
idl.append(range(idl_start, idl_stop))
# print(idl)
result = Obs(deltas, rep_names, idl=idl)
return result
def read_qtop_sector(target=0, **kwargs):
"""target: int
specifies the topological sector to be reweighted to (default 0)
q_top: Obs
alternatively takes args of read_qtop method as kwargs
"""
if "q_top" in kwargs:
qtop = kwargs.get("q_top")
else:
if "path" in kwargs:
path = kwargs.get("path")
del kwargs["path"]
else:
raise Exception("If you are not providing q_top, please provide path")
if "prefix" in kwargs:
prefix = kwargs.get("prefix")
del kwargs["prefix"]
else:
raise Exception("If you are not providing q_top, please provide prefix")
if "c" in kwargs:
c = kwargs.get("c")
del kwargs["c"]
else:
raise Exception("If you are not providing q_top, please provide c")
if "version" in kwargs:
version = kwargs.get("version")
del kwargs["version"]
else:
version = "1.2"
if "dtr_cnfg" in kwargs:
dtr_cnfg = kwargs.get("dtr_cnfg")
del kwargs["dtr_cnfg"]
else:
dtr_cnfg = 1
qtop = read_qtop(path, prefix, c, dtr_cnfg=dtr_cnfg,
version=version, **kwargs)
# unpack to original values, project onto target sector
names = qtop.names
print(names)
print(qtop.deltas.keys())
proj_qtop = []
for n in qtop.deltas:
proj_qtop.append(np.array([1 if int(qtop.value + q) == target else 0 for q in qtop.deltas[n]]))
result = Obs(proj_qtop, qtop.names)
return result

View file

@ -6,17 +6,57 @@ import fnmatch
import re
import numpy as np # Thinly-wrapped numpy
from ..obs import Obs
from . import utils
def read_sfcf(path, prefix, name, **kwargs):
"""Read sfcf C format from given folder structure.
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
version="1.0c", **kwargs):
"""Read sfcf c format from given folder structure.
Parameters
----------
im -- if True, read imaginary instead of real part of the correlation function.
single -- if True, read a boundary-to-boundary correlation function with a single value
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
quarks: str
Label of the quarks used in the sfcf input file. e.g. "quark quark"
for version 0.0 this does NOT need to be given with the typical " - "
that is present in the output file,
this is done automatically for this version
noffset: int
Offset of the source (only relevant when wavefunctions are used)
wf: int
ID of wave function
wf2: int
ID of the second wavefunction
(only relevant for boundary-to-boundary correlation functions)
im: bool
if True, read imaginary instead of real part
of the correlation function.
b2b: bool
if True, read a time-dependent boundary-to-boundary
correlation function
single: bool
if True, read time independent boundary to boundary
correlation function
names: list
Alternative labeling for replicas/ensembles.
Has to have the appropriate length
ens_name : str
replaces the name of the ensemble
version: str
version of SFCF, with which the measurement was done.
if the compact output option (-c) was spectified,
append a "c" to the version (e.g. "1.0c")
if the append output option (-a) was specified,
append an "a" to the version
replica: list
list of replica to be read, default is all
files: list
list of files to be read per replica, default is all.
for non-conpact ouztput format, hand the folders to be read here.
check_configs:
list of list of supposed configs, eg. [range(1,1000)]
for one replicum with 1000 configs
TODO:
- whats going on with files here?
"""
if kwargs.get('im'):
im = 1
@ -29,266 +69,321 @@ def read_sfcf(path, prefix, name, **kwargs):
b2b = 1
single = 1
else:
b2b = 0
if kwargs.get('b2b'):
b2b = 1
else:
b2b = 0
single = 0
if "replica" in kwargs:
reps = kwargs.get("replica")
if kwargs.get('b2b'):
b2b = 1
# due to higher usage in current projects,
# compact file format is default
compact = True
appended = False
# get version string
known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
if version not in known_versions:
raise Exception("This version is not known!")
# if the letter c is appended to the version,
# the compact fileformat is used (former read_sfcf_c)
if(version[-1] == "c"):
appended = False
compact = True
version = version[:-1]
elif(version[-1] == "a"):
appended = True
compact = False
version = version[:-1]
else:
compact = False
appended = False
read = 0
T = 0
start = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
raise Exception('Error, directory not found')
for exc in ls:
if fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set(exc))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
if "replica" in kwargs:
ls = reps
else:
# Adjust replica names to new bookmarking system
new_names = []
for entry in ls:
idx = entry.index('r')
new_names.append(entry[:idx] + '|' + entry[idx:])
print(replica, 'replica')
for i, item in enumerate(ls):
print(item)
sub_ls = []
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
sub_ls.extend(dirnames)
break
for exc in sub_ls:
if fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set(exc))
sub_ls.sort(key=lambda x: int(x[3:]))
no_cfg = len(sub_ls)
print(no_cfg, 'configurations')
if i == 0:
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
T += 1
if '[correlator]' in line:
read = 1
start = k + 7 + b2b
T -= b2b
deltas = []
for j in range(T):
deltas.append([])
sublength = len(sub_ls)
for j in range(T):
deltas[j].append(np.zeros(sublength))
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
deltas[k - start][i][cnfg] = floats[1 + im - single]
result = []
for t in range(T):
result.append(Obs(deltas[t], new_names))
return result
def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
"""Read sfcf c format from given folder structure.
Parameters
----------
quarks -- Label of the quarks used in the sfcf input file
noffset -- Offset of the source (only relevant when wavefunctions are used)
wf -- ID of wave function
wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
im -- if True, read imaginary instead of real part of the correlation function.
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
ens_name : str
replaces the name of the ensemble
"""
if kwargs.get('im'):
im = 1
part = 'imaginary'
else:
im = 0
part = 'real'
if kwargs.get('b2b'):
b2b = 1
else:
b2b = 0
T = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
raise Exception('Error, directory not found')
# Exclude folders with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
# Adjust replica names to new bookmarking system
new_names = []
for entry in ls:
idx = entry.index('r')
if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
for (dirpath, dirnames, filenames) in os.walk(path):
if not appended:
ls.extend(dirnames)
else:
new_names.append(entry[:idx] + '|' + entry[idx:])
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
for i, item in enumerate(ls):
sub_ls = []
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
sub_ls.extend(filenames)
ls.extend(filenames)
break
for exc in sub_ls:
if not ls:
raise Exception('Error, directory not found')
# Exclude folders with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
# New version, to cope with ids, etc.
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
if not appended:
replica = len(ls)
else:
replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
print('Read', part, 'part of', name, 'from', prefix[:-1],
',', replica, 'replica')
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != len(set(new_names)):
raise Exception("names are not unique!")
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
# Adjust replica names to new bookmarking system
last_cfg = len(sub_ls) + first_cfg - 1
new_names = []
if not appended:
for entry in ls:
try:
idx = entry.index('r')
except Exception:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
for cfg in range(1, len(sub_ls)):
if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
last_cfg = cfg + first_cfg - 1
break
no_cfg = last_cfg - first_cfg + 1
print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
if i == 0:
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
assert T > 0
print(T, 'entries, starting to read in line', start_read)
if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
else:
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
new_names.append(entry[:idx] + '|' + entry[idx:])
else:
deltas = []
for j in range(T):
deltas.append([])
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.' + name):
ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
for entry in ls:
myentry = entry[:-len(name) - 1]
# print(myentry)
try:
idx = myentry.index('r')
except Exception:
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
sublength = no_cfg
for j in range(T):
deltas[j].append(np.zeros(sublength))
if 'ens_name' in kwargs:
new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
else:
new_names.append(myentry[:idx] + '|' + myentry[idx:])
# print(new_names)
idl = []
if not appended:
for i, item in enumerate(ls):
sub_ls = []
if "files" in kwargs:
sub_ls = kwargs.get("files")
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
else:
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
if compact:
sub_ls.extend(filenames)
else:
sub_ls.extend(dirnames)
break
for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
for k, line in enumerate(fp):
if k == start_read - 5 - b2b:
if line.strip() != 'name ' + name:
raise Exception('Wrong format', sub_ls[cfg])
if(k >= start_read and k < start_read + T):
floats = list(map(float, line.split()))
deltas[k - start_read][i][cfg] = floats[-2:][im]
# print(sub_ls)
for exc in sub_ls:
if compact:
if not fnmatch.fnmatch(exc, prefix + '*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x:
int(re.findall(r'\d+', x)[-1]))
else:
if not fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(x[3:]))
# print(sub_ls)
rep_idl = []
no_cfg = len(sub_ls)
for cfg in sub_ls:
try:
if compact:
rep_idl.append(int(cfg.split("n")[-1]))
else:
rep_idl.append(int(cfg[3:]))
except Exception:
raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
rep_idl.sort()
# maybe there is a better way to print the idls
print(item, ':', no_cfg, ' configurations')
idl.append(rep_idl)
# here we have found all the files we need to look into.
if i == 0:
# here, we want to find the place within the file,
# where the correlator we need is stored.
if compact:
# to do so, the pattern needed is put together
# from the input values
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
# and the file is parsed through to find the pattern
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
content = file.read()
match = re.search(pattern, content)
if match:
# the start and end point of the correlator
# in quaetion is extracted for later use in
# the other files
start_read = content.count('\n', 0, match.start()) + 5 + b2b
end_match = re.search(r'\n\s*\n', content[match.start():])
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
assert T > 0
print(T, 'entries, starting to read in line', start_read)
else:
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
else:
# this part does the same as above,
# but for non-compactified versions of the files
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if version == "0.0":
# check if this is really the right file
# by matching pattern similar to above
pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
# if b2b, a second wf is needed
if b2b:
pattern += ", wf_2 " + str(wf2)
qs = quarks.split(" ")
pattern += " : " + qs[0] + " - " + qs[1]
# print(pattern)
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
T += 1
if version == "0.0":
if pattern in line:
# print(line)
read = 1
start = k + 1
else:
if '[correlator]' in line:
read = 1
start = k + 7 + b2b
T -= b2b
print(str(T) + " entries found.")
# we found where the correlator
# that is to be read is in the files
# after preparing the datastructure
# the correlators get parsed into...
deltas = []
for j in range(T):
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(no_cfg))
# ...the actual parsing can start.
# we iterate through all measurement files in the path given...
if compact:
for cfg in range(no_cfg):
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
lines = fp.readlines()
# check, if the correlator is in fact
# printed completely
if(start_read + T > len(lines)):
raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
# and start to read the correlator.
# the range here is chosen like this,
# since this allows for implementing
# a security check for every read correlator later...
for k in range(start_read - 6, start_read + T):
if k == start_read - 5 - b2b:
if lines[k].strip() != 'name ' + name:
raise Exception('Wrong format',
sub_ls[cfg])
if(k >= start_read and k < start_read + T):
floats = list(map(float, lines[k].split()))
deltas[k - start_read][i][cfg] = floats[-2:][im]
else:
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
# since the non-compatified files
# are typically not so long,
# we can iterate over the whole file.
# here one can also implement the chekc from above.
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
if version == "0.0":
deltas[k - start][i][cnfg] = floats[im]
else:
deltas[k - start][i][cnfg] = floats[1 + im - single]
else:
if "files" in kwargs:
ls = kwargs.get("files")
else:
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.' + name):
ls = list(set(ls) - set([exc]))
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
# print(ls)
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
if b2b:
pattern += '\nwf_2 ' + str(wf2)
for rep, file in enumerate(ls):
rep_idl = []
with open(path + '/' + file, 'r') as fp:
content = fp.readlines()
data_starts = []
for linenumber, line in enumerate(content):
if "[run]" in line:
data_starts.append(linenumber)
if len(set([data_starts[i] - data_starts[i - 1] for i in
range(1, len(data_starts))])) > 1:
raise Exception("Irregularities in file structure found, not all runs have the same output length")
# first chunk of data
chunk = content[:data_starts[1]]
for linenumber, line in enumerate(chunk):
if line.startswith("gauge_name"):
gauge_line = linenumber
elif line.startswith("[correlator]"):
corr_line = linenumber
found_pat = ""
for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
found_pat += li
if re.search(pattern, found_pat):
start_read = corr_line + 7 + b2b
T = len(chunk) - 1 - start_read
if rep == 0:
deltas = []
for t in range(T):
deltas.append([])
for t in range(T):
deltas[t].append(np.zeros(len(data_starts)))
# all other chunks should follow the same structure
for cnfg in range(len(data_starts)):
start = data_starts[cnfg]
stop = start + data_starts[1]
chunk = content[start:stop]
# meta_data = {}
try:
rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
except Exception:
raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line)
found_pat = ""
for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
found_pat += li
if re.search(pattern, found_pat):
for t, line in enumerate(chunk[start_read:start_read + T]):
floats = list(map(float, line.split()))
deltas[t][rep][cnfg] = floats[-2:][im]
idl.append(rep_idl)
if "check_configs" in kwargs:
print("Checking for missing configs...")
che = kwargs.get("check_configs")
if not (len(che) == len(idl)):
raise Exception("check_configs has to be the same length as replica!")
for r in range(len(idl)):
print("checking " + new_names[r])
utils.check_idl(idl[r], che[r])
print("Done")
result = []
for t in range(T):
result.append(Obs(deltas[t], new_names))
return result
def read_qtop(path, prefix, **kwargs):
"""Read qtop format from given folder structure.
Parameters
----------
target -- specifies the topological sector to be reweighted to (default 0)
full -- if true read the charge instead of the reweighting factor.
"""
if 'target' in kwargs:
target = kwargs.get('target')
else:
target = 0
if kwargs.get('full'):
full = 1
else:
full = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
raise Exception('Error, directory not found')
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
print('Read Q_top from', prefix[:-1], ',', replica, 'replica')
deltas = []
for rep in range(replica):
tmp = []
with open(path + '/' + ls[rep]) as fp:
for k, line in enumerate(fp):
floats = list(map(float, line.split()))
if full == 1:
tmp.append(floats[1])
else:
if int(floats[1]) == target:
tmp.append(1.0)
else:
tmp.append(0.0)
deltas.append(np.array(tmp))
rep_names = []
for entry in ls:
truncated_entry = entry.split('.')[0]
idx = truncated_entry.index('r')
rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:])
result = Obs(deltas, rep_names)
result.append(Obs(deltas[t], new_names, idl=idl))
return result

15
pyerrors/input/utils.py Normal file
View file

@ -0,0 +1,15 @@
"""Utilities for the input"""
def check_idl(idl, che):
missing = []
for c in che:
if c not in idl:
missing.append(c)
# print missing such that it can directly be parsed to slurm terminal
if not (len(missing) == 0):
print(len(missing), "configs missing")
miss_str = str(missing[0])
for i in missing[1:]:
miss_str += "," + str(i)
print(miss_str)

136
tests/input_test.ipynb Normal file
View file

@ -0,0 +1,136 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This file is used for testing some of the input methods."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os,sys,inspect\n",
"current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n",
"parent_dir = os.path.dirname(current_dir)\n",
"sys.path.insert(0, parent_dir) \n",
"\n",
"import pyerrors as pe\n",
"import pyerrors.input.openQCD as qcdin\n",
"import pyerrors.input.sfcf as sfin\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, we will have a look at the input method for the topological charge $Q_{top}$, which is measured by the program ms from the openQCD package. For now, this part still in the making and depends on an actual file. Later, this should be changed to a more efficient way of making a proper input file.\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['T29L20k0.13719r2.ms.dat', 'T29L20k0.13719r3.ms.dat', 'T29L20k0.13719r1.ms.dat', 'T29L20k0.13719r4.ms.dat']\n",
"dn: 10\n",
"nn: 60\n",
"tmax: 30\n",
"eps: 0.02\n",
"max_t: 12.0\n",
"t_aim: 6.125\n",
"index_aim: 31\n",
"T29L20k0.13719r2\n",
"dn: 10\n",
"nn: 60\n",
"tmax: 30\n",
"eps: 0.02\n",
"max_t: 12.0\n",
"t_aim: 6.125\n",
"index_aim: 31\n",
"T29L20k0.13719r3\n",
"dn: 10\n",
"nn: 60\n",
"tmax: 30\n",
"eps: 0.02\n",
"max_t: 12.0\n",
"t_aim: 6.125\n",
"index_aim: 31\n",
"T29L20k0.13719r1\n",
"dn: 10\n",
"nn: 60\n",
"tmax: 30\n",
"eps: 0.02\n",
"max_t: 12.0\n",
"t_aim: 6.125\n",
"index_aim: 31\n",
"T29L20k0.13719r4\n"
]
}
],
"source": [
"r_qtop = qcdin.read_qtop(\"../../test_data\", prefix = \"T29L20k0.13719\",full = True, r_stop = [500,440,447,410])#, files = [\"T29L20k0.13719r1.ms.dat\"], )"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'T29L20k0.13719|r1': 500, 'T29L20k0.13719|r2': 440, 'T29L20k0.13719|r3': 447, 'T29L20k0.13719|r4': 410}\n",
"0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -2 -2 -2 -2 -3 -3 -3 -3 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
]
}
],
"source": [
"print(r_qtop.shape)\n",
"#print(r_qtop.deltas['T29L20k0.13719|r1'])\n",
"for i in r_qtop.deltas['T29L20k0.13719|r2']:\n",
" print(round(r_qtop.value + i), end =\" \")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
},
"kernelspec": {
"display_name": "Python 3.9.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}