mirror of
https://github.com/fjosw/pyerrors.git
synced 2025-05-15 20:13:41 +02:00
Merge pull request #32 from jkuhl-uni/feature/input_2.0
Feature/input 2.0
This commit is contained in:
commit
c76e8b8bad
4 changed files with 777 additions and 270 deletions
|
@ -39,13 +39,15 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
|
||||
if not ls:
|
||||
raise Exception('Error, directory not found')
|
||||
|
||||
# Exclude files with different names
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
|
||||
ls = list(set(ls) - set([exc]))
|
||||
if len(ls) > 1:
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
|
||||
if 'files' in kwargs:
|
||||
ls = kwargs.get('files')
|
||||
else:
|
||||
# Exclude files with different names
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
|
||||
ls = list(set(ls) - set([exc]))
|
||||
if len(ls) > 1:
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
|
||||
replica = len(ls)
|
||||
|
||||
if 'r_start' in kwargs:
|
||||
|
@ -64,7 +66,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
else:
|
||||
r_stop = [None] * replica
|
||||
|
||||
print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='')
|
||||
print('Read reweighting factors from', prefix[:-1], ',',
|
||||
replica, 'replica', end='')
|
||||
|
||||
# Adjust replica names to new bookmarking system
|
||||
if names is None:
|
||||
|
@ -94,7 +97,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
for k in range(nrw):
|
||||
deltas.append([])
|
||||
else:
|
||||
if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): # little weird if-clause due to the /2 operation needed.
|
||||
# little weird if-clause due to the /2 operation needed.
|
||||
if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):
|
||||
raise Exception('Error: different number of reweighting factors for replicum', rep)
|
||||
|
||||
for k in range(nrw):
|
||||
|
@ -106,7 +110,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
for i in range(nrw):
|
||||
t = fp.read(4)
|
||||
nfct.append(struct.unpack('i', t)[0])
|
||||
# print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
|
||||
# print('nfct: ', nfct) # Hasenbusch factor,
|
||||
# 1 for rat reweighting
|
||||
else:
|
||||
for i in range(nrw):
|
||||
nfct.append(1)
|
||||
|
@ -135,8 +140,11 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
for j in range(tmpd['n'][0]):
|
||||
tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw[j])))
|
||||
if print_err:
|
||||
print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw[j]))), np.std(np.exp(-np.asarray(tmp_rw[j]))))
|
||||
print('Sources:', np.exp(-np.asarray(tmp_rw[j])))
|
||||
print(config_no, i, j,
|
||||
np.mean(np.exp(-np.asarray(tmp_rw[j]))),
|
||||
np.std(np.exp(-np.asarray(tmp_rw[j]))))
|
||||
print('Sources:',
|
||||
np.exp(-np.asarray(tmp_rw[j])))
|
||||
print('Partial factor:', tmp_nfct)
|
||||
elif version == '1.6' or version == '1.4':
|
||||
tmp_nfct = 1.0
|
||||
|
@ -146,7 +154,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
tmp_rw = struct.unpack('d' * nsrc[i], t)
|
||||
tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw)))
|
||||
if print_err:
|
||||
print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw))))
|
||||
print(config_no, i, j,
|
||||
np.mean(np.exp(-np.asarray(tmp_rw))),
|
||||
np.std(np.exp(-np.asarray(tmp_rw))))
|
||||
print('Sources:', np.exp(-np.asarray(tmp_rw)))
|
||||
print('Partial factor:', tmp_nfct)
|
||||
tmp_array[i].append(tmp_nfct)
|
||||
|
@ -165,11 +175,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
|
|||
return result
|
||||
|
||||
|
||||
def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs):
|
||||
def extract_t0(path, prefix, dtr_read, xmin,
|
||||
spatial_extent, fit_range=5, **kwargs):
|
||||
"""Extract t0 from given .ms.dat files. Returns t0 as Obs.
|
||||
|
||||
It is assumed that all boundary effects have sufficiently decayed at x0=xmin.
|
||||
The data around the zero crossing of t^2<E> - 0.3 is fitted with a linear function
|
||||
It is assumed that all boundary effects have
|
||||
sufficiently decayed at x0=xmin.
|
||||
The data around the zero crossing of t^2<E> - 0.3
|
||||
is fitted with a linear function
|
||||
from which the exact root is extracted.
|
||||
Only works with openQCD v 1.2.
|
||||
|
||||
|
@ -180,14 +193,17 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
|
|||
prefix : str
|
||||
Ensemble prefix
|
||||
dtr_read : int
|
||||
Determines how many trajectories should be skipped when reading the ms.dat files.
|
||||
Determines how many trajectories should be skipped
|
||||
when reading the ms.dat files.
|
||||
Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
|
||||
xmin : int
|
||||
First timeslice where the boundary effects have sufficiently decayed.
|
||||
First timeslice where the boundary
|
||||
effects have sufficiently decayed.
|
||||
spatial_extent : int
|
||||
spatial extent of the lattice, required for normalization.
|
||||
fit_range : int
|
||||
Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5)
|
||||
Number of data points left and right of the zero
|
||||
crossing to be included in the linear fit. (Default: 5)
|
||||
r_start : list
|
||||
list which contains the first config to be read for each replicum.
|
||||
r_stop: list
|
||||
|
@ -273,7 +289,9 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
|
|||
|
||||
Ysum.append([])
|
||||
for i, item in enumerate(Ysl):
|
||||
Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)])
|
||||
Ysum[-1].append([np.mean(item[current + xmin:
|
||||
current + tmax - xmin])
|
||||
for current in range(0, len(item), tmax)])
|
||||
|
||||
t2E_dict = {}
|
||||
for n in range(nn + 1):
|
||||
|
@ -286,10 +304,13 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
|
|||
new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
|
||||
t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3
|
||||
|
||||
zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0)
|
||||
zero_crossing = np.argmax(np.array(
|
||||
[o.value for o in t2E_dict.values()]) > 0.0)
|
||||
|
||||
x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range]
|
||||
y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range]
|
||||
x = list(t2E_dict.keys())[zero_crossing - fit_range:
|
||||
zero_crossing + fit_range]
|
||||
y = list(t2E_dict.values())[zero_crossing - fit_range:
|
||||
zero_crossing + fit_range]
|
||||
[o.gamma_method() for o in y]
|
||||
|
||||
fit_result = fit_lin(x, y)
|
||||
|
@ -343,3 +364,243 @@ def _read_array_openQCD2(fp):
|
|||
|
||||
arr = _parse_array_openQCD2(d, n, size, tmp, quadrupel=True)
|
||||
return {'d': d, 'n': n, 'size': size, 'arr': arr}
|
||||
|
||||
|
||||
def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
|
||||
"""Read qtop format from given folder structure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path:
|
||||
path of the measurement files
|
||||
prefix:
|
||||
prefix of the measurement files, e.g. <prefix>_id0_r0.ms.dat
|
||||
c: double
|
||||
Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L
|
||||
dtr_cnfg: int
|
||||
(optional) parameter that specifies the number of trajectories
|
||||
between two configs.
|
||||
if it is not set, the distance between two measurements
|
||||
in the file is assumed to be
|
||||
the distance between two configurations.
|
||||
steps: int
|
||||
(optional) (maybe only necessary for openQCD2.0)
|
||||
nt step size, guessed if not given
|
||||
version: str
|
||||
version string of the openQCD (sfqcd) version used to create
|
||||
the ensemble
|
||||
L: int
|
||||
spatial length of the lattice in L/a.
|
||||
HAS to be set if version != sfqcd, since openQCD does not provide
|
||||
this in the header
|
||||
r_start: list
|
||||
offset of the first ensemble, making it easier to match
|
||||
later on with other Obs
|
||||
r_stop: list
|
||||
last configurations that need to be read (per replicum)
|
||||
files: list
|
||||
specify the exact files that need to be read
|
||||
from path, pratical if e.g. only one replicum is needed
|
||||
names: list
|
||||
Alternative labeling for replicas/ensembles.
|
||||
Has to have the appropriate length
|
||||
"""
|
||||
# one could read L from the header in case of sfQCD
|
||||
# c = 0.35
|
||||
known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"]
|
||||
|
||||
if version not in known_versions:
|
||||
raise Exception("Unknown openQCD version.")
|
||||
if "steps" in kwargs:
|
||||
steps = kwargs.get("steps")
|
||||
if version == "sfqcd":
|
||||
if "L" in kwargs:
|
||||
supposed_L = kwargs.get("L")
|
||||
else:
|
||||
if "L" not in kwargs:
|
||||
raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.")
|
||||
else:
|
||||
L = kwargs.get("L")
|
||||
r_start = 1
|
||||
if "r_start" in kwargs:
|
||||
r_start = kwargs.get("r_start")
|
||||
if "r_stop" in kwargs:
|
||||
r_stop = kwargs.get("r_stop")
|
||||
# if one wants to read specific files with this method...
|
||||
if "files" in kwargs:
|
||||
files = kwargs.get("files")
|
||||
else:
|
||||
# find files in path
|
||||
found = []
|
||||
files = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(path + "/"):
|
||||
# print(filenames)
|
||||
found.extend(filenames)
|
||||
break
|
||||
for f in found:
|
||||
if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"):
|
||||
files.append(f)
|
||||
print(files)
|
||||
# now that we found our files, we dechiffer them...
|
||||
rep_names = []
|
||||
|
||||
deltas = []
|
||||
idl = []
|
||||
for rep, file in enumerate(files):
|
||||
with open(path + "/" + file, "rb") as fp:
|
||||
# header
|
||||
t = fp.read(12)
|
||||
header = struct.unpack('<iii', t)
|
||||
# step size in integration steps "dnms"
|
||||
dn = header[0]
|
||||
# number of measurements, so "ntot"/dn
|
||||
nn = header[1]
|
||||
# lattice T/a
|
||||
tmax = header[2]
|
||||
if version == "sfqcd":
|
||||
t = fp.read(12)
|
||||
Ls = struct.unpack('<iii', t)
|
||||
if(Ls[0] == Ls[1] and Ls[1] == Ls[2]):
|
||||
L = Ls[0]
|
||||
if not (supposed_L == L):
|
||||
raise Exception("It seems the length given in the header and by you contradict each other")
|
||||
else:
|
||||
raise Exception("Found more than one spatial length in header!")
|
||||
|
||||
print('dnms:', dn)
|
||||
print('nn:', nn)
|
||||
print('tmax:', tmax)
|
||||
t = fp.read(8)
|
||||
eps = struct.unpack('d', t)[0]
|
||||
print('eps:', eps)
|
||||
|
||||
Q = []
|
||||
ncs = []
|
||||
while 0 < 1:
|
||||
# int nt
|
||||
t = fp.read(4)
|
||||
if(len(t) < 4):
|
||||
break
|
||||
ncs.append(struct.unpack('i', t)[0])
|
||||
# Wsl
|
||||
t = fp.read(8 * tmax * (nn + 1))
|
||||
# Ysl
|
||||
t = fp.read(8 * tmax * (nn + 1))
|
||||
# Qsl, which is asked for in this method
|
||||
t = fp.read(8 * tmax * (nn + 1))
|
||||
# unpack the array of Qtops,
|
||||
# on each timeslice t=0,...,tmax-1 and the
|
||||
# measurement number in = 0...nn (see README.qcd1)
|
||||
tmpd = struct.unpack('d' * tmax * (nn + 1), t)
|
||||
Q.append(tmpd)
|
||||
|
||||
if not len(set([ncs[i] - ncs[i - 1] for i in range(1, len(ncs))])):
|
||||
raise Exception("Irregularities in stepsize found")
|
||||
else:
|
||||
if 'steps' in kwargs:
|
||||
if steps != ncs[1] - ncs[0]:
|
||||
raise Exception("steps and the found stepsize are not the same")
|
||||
else:
|
||||
steps = ncs[1] - ncs[0]
|
||||
|
||||
print(len(Q))
|
||||
print('max_t:', dn * (nn) * eps)
|
||||
|
||||
t_aim = (c * L) ** 2 / 8
|
||||
|
||||
print('t_aim:', t_aim)
|
||||
index_aim = round(t_aim / eps / dn)
|
||||
print('index_aim:', index_aim)
|
||||
|
||||
Q_sum = []
|
||||
for i, item in enumerate(Q):
|
||||
Q_sum.append([sum(item[current:current + tmax])
|
||||
for current in range(0, len(item), tmax)])
|
||||
print(len(Q_sum))
|
||||
print(len(Q_sum[0]))
|
||||
Q_round = []
|
||||
for i in range(len(Q) // dtr_cnfg):
|
||||
Q_round.append(round(Q_sum[dtr_cnfg * i][index_aim]))
|
||||
if len(Q_round) != len(ncs) // dtr_cnfg:
|
||||
raise Exception("qtops and ncs dont have the same length")
|
||||
|
||||
# replica = len(files)
|
||||
|
||||
truncated_file = file[:-7]
|
||||
print(truncated_file)
|
||||
idl_start = 1
|
||||
|
||||
if "r_start" in kwargs:
|
||||
Q_round = Q_round[r_start[rep]:]
|
||||
idl_start = r_start[rep]
|
||||
if "r_stop" in kwargs:
|
||||
Q_round = Q_round[:r_stop[rep]]
|
||||
idl_stop = idl_start + len(Q_round)
|
||||
# keyword "names" prevails over "ens_name"
|
||||
if "names" not in kwargs:
|
||||
try:
|
||||
idx = truncated_file.index('r')
|
||||
except Exception:
|
||||
if "names" not in kwargs:
|
||||
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
|
||||
if "ens_name" in kwargs:
|
||||
ens_name = kwargs.get("ens_name")
|
||||
else:
|
||||
ens_name = truncated_file[:idx]
|
||||
rep_names.append(ens_name + '|' + truncated_file[idx:])
|
||||
else:
|
||||
names = kwargs.get("names")
|
||||
rep_names = names
|
||||
deltas.append(np.array(Q_round))
|
||||
idl.append(range(idl_start, idl_stop))
|
||||
# print(idl)
|
||||
result = Obs(deltas, rep_names, idl=idl)
|
||||
return result
|
||||
|
||||
|
||||
def read_qtop_sector(target=0, **kwargs):
|
||||
"""target: int
|
||||
specifies the topological sector to be reweighted to (default 0)
|
||||
q_top: Obs
|
||||
alternatively takes args of read_qtop method as kwargs
|
||||
"""
|
||||
if "q_top" in kwargs:
|
||||
qtop = kwargs.get("q_top")
|
||||
else:
|
||||
if "path" in kwargs:
|
||||
path = kwargs.get("path")
|
||||
del kwargs["path"]
|
||||
else:
|
||||
raise Exception("If you are not providing q_top, please provide path")
|
||||
if "prefix" in kwargs:
|
||||
prefix = kwargs.get("prefix")
|
||||
del kwargs["prefix"]
|
||||
else:
|
||||
raise Exception("If you are not providing q_top, please provide prefix")
|
||||
if "c" in kwargs:
|
||||
c = kwargs.get("c")
|
||||
del kwargs["c"]
|
||||
else:
|
||||
raise Exception("If you are not providing q_top, please provide c")
|
||||
if "version" in kwargs:
|
||||
version = kwargs.get("version")
|
||||
del kwargs["version"]
|
||||
else:
|
||||
version = "1.2"
|
||||
if "dtr_cnfg" in kwargs:
|
||||
dtr_cnfg = kwargs.get("dtr_cnfg")
|
||||
del kwargs["dtr_cnfg"]
|
||||
else:
|
||||
dtr_cnfg = 1
|
||||
qtop = read_qtop(path, prefix, c, dtr_cnfg=dtr_cnfg,
|
||||
version=version, **kwargs)
|
||||
# unpack to original values, project onto target sector
|
||||
names = qtop.names
|
||||
print(names)
|
||||
print(qtop.deltas.keys())
|
||||
proj_qtop = []
|
||||
for n in qtop.deltas:
|
||||
proj_qtop.append(np.array([1 if int(qtop.value + q) == target else 0 for q in qtop.deltas[n]]))
|
||||
|
||||
result = Obs(proj_qtop, qtop.names)
|
||||
return result
|
||||
|
|
|
@ -6,17 +6,57 @@ import fnmatch
|
|||
import re
|
||||
import numpy as np # Thinly-wrapped numpy
|
||||
from ..obs import Obs
|
||||
from . import utils
|
||||
|
||||
|
||||
def read_sfcf(path, prefix, name, **kwargs):
|
||||
"""Read sfcf C format from given folder structure.
|
||||
def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
|
||||
version="1.0c", **kwargs):
|
||||
"""Read sfcf c format from given folder structure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
im -- if True, read imaginary instead of real part of the correlation function.
|
||||
single -- if True, read a boundary-to-boundary correlation function with a single value
|
||||
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
|
||||
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
|
||||
quarks: str
|
||||
Label of the quarks used in the sfcf input file. e.g. "quark quark"
|
||||
for version 0.0 this does NOT need to be given with the typical " - "
|
||||
that is present in the output file,
|
||||
this is done automatically for this version
|
||||
noffset: int
|
||||
Offset of the source (only relevant when wavefunctions are used)
|
||||
wf: int
|
||||
ID of wave function
|
||||
wf2: int
|
||||
ID of the second wavefunction
|
||||
(only relevant for boundary-to-boundary correlation functions)
|
||||
im: bool
|
||||
if True, read imaginary instead of real part
|
||||
of the correlation function.
|
||||
b2b: bool
|
||||
if True, read a time-dependent boundary-to-boundary
|
||||
correlation function
|
||||
single: bool
|
||||
if True, read time independent boundary to boundary
|
||||
correlation function
|
||||
names: list
|
||||
Alternative labeling for replicas/ensembles.
|
||||
Has to have the appropriate length
|
||||
ens_name : str
|
||||
replaces the name of the ensemble
|
||||
version: str
|
||||
version of SFCF, with which the measurement was done.
|
||||
if the compact output option (-c) was spectified,
|
||||
append a "c" to the version (e.g. "1.0c")
|
||||
if the append output option (-a) was specified,
|
||||
append an "a" to the version
|
||||
replica: list
|
||||
list of replica to be read, default is all
|
||||
files: list
|
||||
list of files to be read per replica, default is all.
|
||||
for non-conpact ouztput format, hand the folders to be read here.
|
||||
check_configs:
|
||||
list of list of supposed configs, eg. [range(1,1000)]
|
||||
for one replicum with 1000 configs
|
||||
TODO:
|
||||
- whats going on with files here?
|
||||
"""
|
||||
if kwargs.get('im'):
|
||||
im = 1
|
||||
|
@ -29,266 +69,321 @@ def read_sfcf(path, prefix, name, **kwargs):
|
|||
b2b = 1
|
||||
single = 1
|
||||
else:
|
||||
b2b = 0
|
||||
if kwargs.get('b2b'):
|
||||
b2b = 1
|
||||
else:
|
||||
b2b = 0
|
||||
single = 0
|
||||
if "replica" in kwargs:
|
||||
reps = kwargs.get("replica")
|
||||
|
||||
if kwargs.get('b2b'):
|
||||
b2b = 1
|
||||
# due to higher usage in current projects,
|
||||
# compact file format is default
|
||||
compact = True
|
||||
appended = False
|
||||
# get version string
|
||||
known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
|
||||
|
||||
if version not in known_versions:
|
||||
raise Exception("This version is not known!")
|
||||
# if the letter c is appended to the version,
|
||||
# the compact fileformat is used (former read_sfcf_c)
|
||||
if(version[-1] == "c"):
|
||||
appended = False
|
||||
compact = True
|
||||
version = version[:-1]
|
||||
elif(version[-1] == "a"):
|
||||
appended = True
|
||||
compact = False
|
||||
version = version[:-1]
|
||||
else:
|
||||
compact = False
|
||||
appended = False
|
||||
read = 0
|
||||
T = 0
|
||||
start = 0
|
||||
ls = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(path):
|
||||
ls.extend(dirnames)
|
||||
break
|
||||
if not ls:
|
||||
raise Exception('Error, directory not found')
|
||||
for exc in ls:
|
||||
if fnmatch.fnmatch(exc, prefix + '*'):
|
||||
ls = list(set(ls) - set(exc))
|
||||
if len(ls) > 1:
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
|
||||
replica = len(ls)
|
||||
print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
|
||||
if 'names' in kwargs:
|
||||
new_names = kwargs.get('names')
|
||||
if len(new_names) != replica:
|
||||
raise Exception('Names does not have the required length', replica)
|
||||
if "replica" in kwargs:
|
||||
ls = reps
|
||||
else:
|
||||
# Adjust replica names to new bookmarking system
|
||||
new_names = []
|
||||
for entry in ls:
|
||||
idx = entry.index('r')
|
||||
new_names.append(entry[:idx] + '|' + entry[idx:])
|
||||
|
||||
print(replica, 'replica')
|
||||
for i, item in enumerate(ls):
|
||||
print(item)
|
||||
sub_ls = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
|
||||
sub_ls.extend(dirnames)
|
||||
break
|
||||
for exc in sub_ls:
|
||||
if fnmatch.fnmatch(exc, 'cfg*'):
|
||||
sub_ls = list(set(sub_ls) - set(exc))
|
||||
sub_ls.sort(key=lambda x: int(x[3:]))
|
||||
no_cfg = len(sub_ls)
|
||||
print(no_cfg, 'configurations')
|
||||
|
||||
if i == 0:
|
||||
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
|
||||
for k, line in enumerate(fp):
|
||||
if read == 1 and not line.strip() and k > start + 1:
|
||||
break
|
||||
if read == 1 and k >= start:
|
||||
T += 1
|
||||
if '[correlator]' in line:
|
||||
read = 1
|
||||
start = k + 7 + b2b
|
||||
T -= b2b
|
||||
|
||||
deltas = []
|
||||
for j in range(T):
|
||||
deltas.append([])
|
||||
|
||||
sublength = len(sub_ls)
|
||||
for j in range(T):
|
||||
deltas[j].append(np.zeros(sublength))
|
||||
|
||||
for cnfg, subitem in enumerate(sub_ls):
|
||||
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
|
||||
for k, line in enumerate(fp):
|
||||
if(k >= start and k < start + T):
|
||||
floats = list(map(float, line.split()))
|
||||
deltas[k - start][i][cnfg] = floats[1 + im - single]
|
||||
|
||||
result = []
|
||||
for t in range(T):
|
||||
result.append(Obs(deltas[t], new_names))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
|
||||
"""Read sfcf c format from given folder structure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
quarks -- Label of the quarks used in the sfcf input file
|
||||
noffset -- Offset of the source (only relevant when wavefunctions are used)
|
||||
wf -- ID of wave function
|
||||
wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
|
||||
im -- if True, read imaginary instead of real part of the correlation function.
|
||||
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
|
||||
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
|
||||
ens_name : str
|
||||
replaces the name of the ensemble
|
||||
"""
|
||||
|
||||
if kwargs.get('im'):
|
||||
im = 1
|
||||
part = 'imaginary'
|
||||
else:
|
||||
im = 0
|
||||
part = 'real'
|
||||
|
||||
if kwargs.get('b2b'):
|
||||
b2b = 1
|
||||
else:
|
||||
b2b = 0
|
||||
|
||||
T = 0
|
||||
ls = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(path):
|
||||
ls.extend(dirnames)
|
||||
break
|
||||
if not ls:
|
||||
raise Exception('Error, directory not found')
|
||||
# Exclude folders with different names
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*'):
|
||||
ls = list(set(ls) - set([exc]))
|
||||
if len(ls) > 1:
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
|
||||
replica = len(ls)
|
||||
if 'names' in kwargs:
|
||||
new_names = kwargs.get('names')
|
||||
if len(new_names) != replica:
|
||||
raise Exception('Names does not have the required length', replica)
|
||||
else:
|
||||
# Adjust replica names to new bookmarking system
|
||||
new_names = []
|
||||
for entry in ls:
|
||||
idx = entry.index('r')
|
||||
if 'ens_name' in kwargs:
|
||||
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
|
||||
for (dirpath, dirnames, filenames) in os.walk(path):
|
||||
if not appended:
|
||||
ls.extend(dirnames)
|
||||
else:
|
||||
new_names.append(entry[:idx] + '|' + entry[idx:])
|
||||
|
||||
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
|
||||
for i, item in enumerate(ls):
|
||||
sub_ls = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
|
||||
sub_ls.extend(filenames)
|
||||
ls.extend(filenames)
|
||||
break
|
||||
for exc in sub_ls:
|
||||
if not ls:
|
||||
raise Exception('Error, directory not found')
|
||||
# Exclude folders with different names
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*'):
|
||||
sub_ls = list(set(sub_ls) - set([exc]))
|
||||
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
|
||||
ls = list(set(ls) - set([exc]))
|
||||
if len(ls) > 1:
|
||||
# New version, to cope with ids, etc.
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
|
||||
|
||||
first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
|
||||
if not appended:
|
||||
replica = len(ls)
|
||||
else:
|
||||
replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
|
||||
print('Read', part, 'part of', name, 'from', prefix[:-1],
|
||||
',', replica, 'replica')
|
||||
if 'names' in kwargs:
|
||||
new_names = kwargs.get('names')
|
||||
if len(new_names) != len(set(new_names)):
|
||||
raise Exception("names are not unique!")
|
||||
if len(new_names) != replica:
|
||||
raise Exception('Names does not have the required length', replica)
|
||||
else:
|
||||
# Adjust replica names to new bookmarking system
|
||||
|
||||
last_cfg = len(sub_ls) + first_cfg - 1
|
||||
new_names = []
|
||||
if not appended:
|
||||
for entry in ls:
|
||||
try:
|
||||
idx = entry.index('r')
|
||||
except Exception:
|
||||
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
|
||||
|
||||
for cfg in range(1, len(sub_ls)):
|
||||
if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
|
||||
last_cfg = cfg + first_cfg - 1
|
||||
break
|
||||
|
||||
no_cfg = last_cfg - first_cfg + 1
|
||||
print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
|
||||
|
||||
if i == 0:
|
||||
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
|
||||
if b2b:
|
||||
pattern += '\nwf_2 ' + str(wf2)
|
||||
|
||||
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
|
||||
content = file.read()
|
||||
match = re.search(pattern, content)
|
||||
if match:
|
||||
start_read = content.count('\n', 0, match.start()) + 5 + b2b
|
||||
end_match = re.search(r'\n\s*\n', content[match.start():])
|
||||
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
|
||||
assert T > 0
|
||||
print(T, 'entries, starting to read in line', start_read)
|
||||
if 'ens_name' in kwargs:
|
||||
new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
|
||||
else:
|
||||
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
|
||||
new_names.append(entry[:idx] + '|' + entry[idx:])
|
||||
else:
|
||||
|
||||
deltas = []
|
||||
for j in range(T):
|
||||
deltas.append([])
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*.' + name):
|
||||
ls = list(set(ls) - set([exc]))
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
|
||||
for entry in ls:
|
||||
myentry = entry[:-len(name) - 1]
|
||||
# print(myentry)
|
||||
try:
|
||||
idx = myentry.index('r')
|
||||
except Exception:
|
||||
raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
|
||||
|
||||
sublength = no_cfg
|
||||
for j in range(T):
|
||||
deltas[j].append(np.zeros(sublength))
|
||||
if 'ens_name' in kwargs:
|
||||
new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
|
||||
else:
|
||||
new_names.append(myentry[:idx] + '|' + myentry[idx:])
|
||||
# print(new_names)
|
||||
idl = []
|
||||
if not appended:
|
||||
for i, item in enumerate(ls):
|
||||
sub_ls = []
|
||||
if "files" in kwargs:
|
||||
sub_ls = kwargs.get("files")
|
||||
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
|
||||
else:
|
||||
for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
|
||||
if compact:
|
||||
sub_ls.extend(filenames)
|
||||
else:
|
||||
sub_ls.extend(dirnames)
|
||||
break
|
||||
|
||||
for cfg in range(no_cfg):
|
||||
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
|
||||
for k, line in enumerate(fp):
|
||||
if k == start_read - 5 - b2b:
|
||||
if line.strip() != 'name ' + name:
|
||||
raise Exception('Wrong format', sub_ls[cfg])
|
||||
if(k >= start_read and k < start_read + T):
|
||||
floats = list(map(float, line.split()))
|
||||
deltas[k - start_read][i][cfg] = floats[-2:][im]
|
||||
# print(sub_ls)
|
||||
for exc in sub_ls:
|
||||
if compact:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*'):
|
||||
sub_ls = list(set(sub_ls) - set([exc]))
|
||||
sub_ls.sort(key=lambda x:
|
||||
int(re.findall(r'\d+', x)[-1]))
|
||||
else:
|
||||
if not fnmatch.fnmatch(exc, 'cfg*'):
|
||||
sub_ls = list(set(sub_ls) - set([exc]))
|
||||
sub_ls.sort(key=lambda x: int(x[3:]))
|
||||
# print(sub_ls)
|
||||
rep_idl = []
|
||||
no_cfg = len(sub_ls)
|
||||
for cfg in sub_ls:
|
||||
try:
|
||||
if compact:
|
||||
rep_idl.append(int(cfg.split("n")[-1]))
|
||||
else:
|
||||
rep_idl.append(int(cfg[3:]))
|
||||
except Exception:
|
||||
raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
|
||||
rep_idl.sort()
|
||||
# maybe there is a better way to print the idls
|
||||
print(item, ':', no_cfg, ' configurations')
|
||||
idl.append(rep_idl)
|
||||
# here we have found all the files we need to look into.
|
||||
if i == 0:
|
||||
# here, we want to find the place within the file,
|
||||
# where the correlator we need is stored.
|
||||
if compact:
|
||||
# to do so, the pattern needed is put together
|
||||
# from the input values
|
||||
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
|
||||
if b2b:
|
||||
pattern += '\nwf_2 ' + str(wf2)
|
||||
# and the file is parsed through to find the pattern
|
||||
with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
|
||||
content = file.read()
|
||||
match = re.search(pattern, content)
|
||||
if match:
|
||||
# the start and end point of the correlator
|
||||
# in quaetion is extracted for later use in
|
||||
# the other files
|
||||
start_read = content.count('\n', 0, match.start()) + 5 + b2b
|
||||
end_match = re.search(r'\n\s*\n', content[match.start():])
|
||||
T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
|
||||
assert T > 0
|
||||
print(T, 'entries, starting to read in line', start_read)
|
||||
else:
|
||||
raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
|
||||
else:
|
||||
# this part does the same as above,
|
||||
# but for non-compactified versions of the files
|
||||
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
|
||||
for k, line in enumerate(fp):
|
||||
if version == "0.0":
|
||||
# check if this is really the right file
|
||||
# by matching pattern similar to above
|
||||
pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
|
||||
# if b2b, a second wf is needed
|
||||
if b2b:
|
||||
pattern += ", wf_2 " + str(wf2)
|
||||
qs = quarks.split(" ")
|
||||
pattern += " : " + qs[0] + " - " + qs[1]
|
||||
# print(pattern)
|
||||
if read == 1 and not line.strip() and k > start + 1:
|
||||
break
|
||||
if read == 1 and k >= start:
|
||||
T += 1
|
||||
|
||||
if version == "0.0":
|
||||
if pattern in line:
|
||||
# print(line)
|
||||
read = 1
|
||||
start = k + 1
|
||||
else:
|
||||
if '[correlator]' in line:
|
||||
read = 1
|
||||
start = k + 7 + b2b
|
||||
T -= b2b
|
||||
print(str(T) + " entries found.")
|
||||
# we found where the correlator
|
||||
# that is to be read is in the files
|
||||
# after preparing the datastructure
|
||||
# the correlators get parsed into...
|
||||
deltas = []
|
||||
for j in range(T):
|
||||
deltas.append([])
|
||||
|
||||
for t in range(T):
|
||||
deltas[t].append(np.zeros(no_cfg))
|
||||
# ...the actual parsing can start.
|
||||
# we iterate through all measurement files in the path given...
|
||||
if compact:
|
||||
for cfg in range(no_cfg):
|
||||
with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
|
||||
lines = fp.readlines()
|
||||
# check, if the correlator is in fact
|
||||
# printed completely
|
||||
if(start_read + T > len(lines)):
|
||||
raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
|
||||
# and start to read the correlator.
|
||||
# the range here is chosen like this,
|
||||
# since this allows for implementing
|
||||
# a security check for every read correlator later...
|
||||
for k in range(start_read - 6, start_read + T):
|
||||
if k == start_read - 5 - b2b:
|
||||
if lines[k].strip() != 'name ' + name:
|
||||
raise Exception('Wrong format',
|
||||
sub_ls[cfg])
|
||||
if(k >= start_read and k < start_read + T):
|
||||
floats = list(map(float, lines[k].split()))
|
||||
deltas[k - start_read][i][cfg] = floats[-2:][im]
|
||||
else:
|
||||
for cnfg, subitem in enumerate(sub_ls):
|
||||
with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
|
||||
# since the non-compatified files
|
||||
# are typically not so long,
|
||||
# we can iterate over the whole file.
|
||||
# here one can also implement the chekc from above.
|
||||
for k, line in enumerate(fp):
|
||||
if(k >= start and k < start + T):
|
||||
floats = list(map(float, line.split()))
|
||||
if version == "0.0":
|
||||
deltas[k - start][i][cnfg] = floats[im]
|
||||
else:
|
||||
deltas[k - start][i][cnfg] = floats[1 + im - single]
|
||||
|
||||
else:
|
||||
if "files" in kwargs:
|
||||
ls = kwargs.get("files")
|
||||
else:
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*.' + name):
|
||||
ls = list(set(ls) - set([exc]))
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
|
||||
# print(ls)
|
||||
pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf)
|
||||
if b2b:
|
||||
pattern += '\nwf_2 ' + str(wf2)
|
||||
for rep, file in enumerate(ls):
|
||||
rep_idl = []
|
||||
with open(path + '/' + file, 'r') as fp:
|
||||
content = fp.readlines()
|
||||
data_starts = []
|
||||
for linenumber, line in enumerate(content):
|
||||
if "[run]" in line:
|
||||
data_starts.append(linenumber)
|
||||
if len(set([data_starts[i] - data_starts[i - 1] for i in
|
||||
range(1, len(data_starts))])) > 1:
|
||||
raise Exception("Irregularities in file structure found, not all runs have the same output length")
|
||||
# first chunk of data
|
||||
chunk = content[:data_starts[1]]
|
||||
for linenumber, line in enumerate(chunk):
|
||||
if line.startswith("gauge_name"):
|
||||
gauge_line = linenumber
|
||||
elif line.startswith("[correlator]"):
|
||||
corr_line = linenumber
|
||||
found_pat = ""
|
||||
for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
|
||||
found_pat += li
|
||||
if re.search(pattern, found_pat):
|
||||
start_read = corr_line + 7 + b2b
|
||||
T = len(chunk) - 1 - start_read
|
||||
if rep == 0:
|
||||
deltas = []
|
||||
for t in range(T):
|
||||
deltas.append([])
|
||||
for t in range(T):
|
||||
deltas[t].append(np.zeros(len(data_starts)))
|
||||
# all other chunks should follow the same structure
|
||||
for cnfg in range(len(data_starts)):
|
||||
start = data_starts[cnfg]
|
||||
stop = start + data_starts[1]
|
||||
chunk = content[start:stop]
|
||||
# meta_data = {}
|
||||
try:
|
||||
rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
|
||||
except Exception:
|
||||
raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line)
|
||||
|
||||
found_pat = ""
|
||||
for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
|
||||
found_pat += li
|
||||
if re.search(pattern, found_pat):
|
||||
for t, line in enumerate(chunk[start_read:start_read + T]):
|
||||
floats = list(map(float, line.split()))
|
||||
deltas[t][rep][cnfg] = floats[-2:][im]
|
||||
idl.append(rep_idl)
|
||||
|
||||
if "check_configs" in kwargs:
|
||||
print("Checking for missing configs...")
|
||||
che = kwargs.get("check_configs")
|
||||
if not (len(che) == len(idl)):
|
||||
raise Exception("check_configs has to be the same length as replica!")
|
||||
for r in range(len(idl)):
|
||||
print("checking " + new_names[r])
|
||||
utils.check_idl(idl[r], che[r])
|
||||
print("Done")
|
||||
result = []
|
||||
for t in range(T):
|
||||
result.append(Obs(deltas[t], new_names))
|
||||
return result
|
||||
|
||||
|
||||
def read_qtop(path, prefix, **kwargs):
|
||||
"""Read qtop format from given folder structure.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target -- specifies the topological sector to be reweighted to (default 0)
|
||||
full -- if true read the charge instead of the reweighting factor.
|
||||
"""
|
||||
|
||||
if 'target' in kwargs:
|
||||
target = kwargs.get('target')
|
||||
else:
|
||||
target = 0
|
||||
|
||||
if kwargs.get('full'):
|
||||
full = 1
|
||||
else:
|
||||
full = 0
|
||||
|
||||
ls = []
|
||||
for (dirpath, dirnames, filenames) in os.walk(path):
|
||||
ls.extend(filenames)
|
||||
break
|
||||
|
||||
if not ls:
|
||||
raise Exception('Error, directory not found')
|
||||
|
||||
# Exclude files with different names
|
||||
for exc in ls:
|
||||
if not fnmatch.fnmatch(exc, prefix + '*'):
|
||||
ls = list(set(ls) - set([exc]))
|
||||
if len(ls) > 1:
|
||||
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
|
||||
replica = len(ls)
|
||||
print('Read Q_top from', prefix[:-1], ',', replica, 'replica')
|
||||
|
||||
deltas = []
|
||||
|
||||
for rep in range(replica):
|
||||
tmp = []
|
||||
with open(path + '/' + ls[rep]) as fp:
|
||||
for k, line in enumerate(fp):
|
||||
floats = list(map(float, line.split()))
|
||||
if full == 1:
|
||||
tmp.append(floats[1])
|
||||
else:
|
||||
if int(floats[1]) == target:
|
||||
tmp.append(1.0)
|
||||
else:
|
||||
tmp.append(0.0)
|
||||
|
||||
deltas.append(np.array(tmp))
|
||||
|
||||
rep_names = []
|
||||
for entry in ls:
|
||||
truncated_entry = entry.split('.')[0]
|
||||
idx = truncated_entry.index('r')
|
||||
rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:])
|
||||
|
||||
result = Obs(deltas, rep_names)
|
||||
|
||||
result.append(Obs(deltas[t], new_names, idl=idl))
|
||||
return result
|
||||
|
|
15
pyerrors/input/utils.py
Normal file
15
pyerrors/input/utils.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
"""Utilities for the input"""
|
||||
|
||||
|
||||
def check_idl(idl, che):
|
||||
missing = []
|
||||
for c in che:
|
||||
if c not in idl:
|
||||
missing.append(c)
|
||||
# print missing such that it can directly be parsed to slurm terminal
|
||||
if not (len(missing) == 0):
|
||||
print(len(missing), "configs missing")
|
||||
miss_str = str(missing[0])
|
||||
for i in missing[1:]:
|
||||
miss_str += "," + str(i)
|
||||
print(miss_str)
|
Loading…
Add table
Add a link
Reference in a new issue