From 6c41810e8182e5d62e5037871d379d5587abf0cf Mon Sep 17 00:00:00 2001 From: Justus Date: Mon, 15 Nov 2021 15:55:26 +0100 Subject: [PATCH 01/17] added Qtop extraction for oQCD1.2 --- pyerrors/input/openQCD.py | 128 ++++++++++++++++++++++++++++++++++++++ pyerrors/input/sfcf.py | 63 ------------------- 2 files changed, 128 insertions(+), 63 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 5e1c8d49..2483baa9 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -343,3 +343,131 @@ def _read_array_openQCD2(fp): arr = _parse_array_openQCD2(d, n, size, tmp, quadrupel=True) return {'d': d, 'n': n, 'size': size, 'arr': arr} + + +def read_qtop(path, prefix, version = "1.2",**kwargs): + """Read qtop format from given folder structure. + + Parameters + ---------- + target -- specifies the topological sector to be reweighted to (default 0) + full -- if true read the charge instead of the reweighting factor. + """ + dtr_cnfg = 4 + L = 20 + c = 0.35 + target = 0 + full = False + + if 'target' in kwargs: + target = kwargs.get('target') + + + if kwargs.get('full'): + full = True + + if "r_start" in kwargs: + r_start = kwargs.get("r_start") + if "r_stop" in kwargs: + r_stop = kwargs.get("r_stop") + #if one wants to read specific files with this method... + if "files" in kwargs: + files = kwargs.get("files") + else: + #find files in path + found = [] + files = [] + for (dirpath, dirnames, filenames) in os.walk(path+"/"): + #print(filenames) + found.extend(filenames) + break + for f in found: + if fnmatch.fnmatch(f, prefix+"*"+".ms.dat"): + files.append(f) + print(files) + #now that we found our files, we dechiffer them... + rep_names = [] + + deltas = [] + for rep,file in enumerate(files): + + with open(path+"/"+file, "rb") as fp: + #this, for now, is only for version 1.2 + #header + t = fp.read(12) + header = struct.unpack('iii', t) + dn = header[0] + nn = header[1] + tmax = header[2] + print('dn:', dn) + print('nn:', nn) + print('tmax:', tmax) + t = fp.read(8) + eps = struct.unpack('d', t)[0] + print('eps:', eps) + + Q = [] + i = 1 + while 0 < 1: + t = fp.read(4) + if(len(t) < 4): + break + nc = struct.unpack('i',t)[0] + if(nc != i): + print("WARNING: possible missing config:" +str(i)) + #raise Exception('Config missing?') + else: + t = fp.read(8 * tmax * (nn + 1)) + t = fp.read(8 * tmax * (nn + 1)) + t = fp.read(8 * tmax * (nn + 1)) + tmpd = struct.unpack('d' * tmax * (nn + 1), t) + Q.append(tmpd) + i += 1 + #print(tmp) + + print('max_t:', dn * (nn) * eps) + + t_aim = (c * L) ** 2 / 8 + + print('t_aim:', t_aim) + index_aim = round(t_aim / eps / dn) + print('index_aim:', index_aim) + + + Q_sum = [] + for i, item in enumerate(Q): + Q_sum.append([sum(item[current:current + tmax]) for current in range(0, len(item), tmax)]) + Q_round = [] + for i in range(len(Q) // dtr_cnfg): + Q_round.append(round(Q_sum[dtr_cnfg * i][index_aim])) + + replica = len(files) + + tmp = [] + for q in Q_round: + #floats = list(map(float, line.split())) + if full: + tmp.append(q) #round(Q_sum[dtr_cnfg * i][index_aim]) + else: + if int(q) == target: #round(Q_sum[dtr_cnfg * i][index_aim]) + tmp.append(1.0) + else: + tmp.append(0.0) + + truncated_file = file[:-7] #as seen in previous examples, this could lead to some weird behaviour... maybe -7 fixes this. + print(truncated_file) + idx = truncated_file.index('r') + #print(truncated_file[idx:]) + # this might be a quite fishy way to find out which replicum we are actually talking about... + if "r_start" in kwargs: + tmp = tmp[r_start[int(truncated_file[idx+1:])-1]:] + if "r_stop" in kwargs: + tmp = tmp[:r_stop[int(truncated_file[idx+1:])-1]] + + rep_names.append(truncated_file[:idx] + '|' + truncated_file[idx:]) + + deltas.append(np.array(tmp)) + + + result = Obs(deltas, rep_names) + return result \ No newline at end of file diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index e48bdd16..706e26a9 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -229,66 +229,3 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg result.append(Obs(deltas[t], new_names)) return result - -def read_qtop(path, prefix, **kwargs): - """Read qtop format from given folder structure. - - Parameters - ---------- - target -- specifies the topological sector to be reweighted to (default 0) - full -- if true read the charge instead of the reweighting factor. - """ - - if 'target' in kwargs: - target = kwargs.get('target') - else: - target = 0 - - if kwargs.get('full'): - full = 1 - else: - full = 0 - - ls = [] - for (dirpath, dirnames, filenames) in os.walk(path): - ls.extend(filenames) - break - - if not ls: - raise Exception('Error, directory not found') - - # Exclude files with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - ls = list(set(ls) - set([exc])) - if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. - replica = len(ls) - print('Read Q_top from', prefix[:-1], ',', replica, 'replica') - - deltas = [] - - for rep in range(replica): - tmp = [] - with open(path + '/' + ls[rep]) as fp: - for k, line in enumerate(fp): - floats = list(map(float, line.split())) - if full == 1: - tmp.append(floats[1]) - else: - if int(floats[1]) == target: - tmp.append(1.0) - else: - tmp.append(0.0) - - deltas.append(np.array(tmp)) - - rep_names = [] - for entry in ls: - truncated_entry = entry.split('.')[0] - idx = truncated_entry.index('r') - rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:]) - - result = Obs(deltas, rep_names) - - return result From bb9bfb78d3b99fd1736b311b18c81ecb3815382e Mon Sep 17 00:00:00 2001 From: Justus Date: Mon, 15 Nov 2021 15:57:26 +0100 Subject: [PATCH 02/17] first Qtop input test --- tests/input_test.ipynb | 136 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 tests/input_test.ipynb diff --git a/tests/input_test.ipynb b/tests/input_test.ipynb new file mode 100644 index 00000000..f241304a --- /dev/null +++ b/tests/input_test.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This file is used for testing some of the input methods." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os,sys,inspect\n", + "current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", + "parent_dir = os.path.dirname(current_dir)\n", + "sys.path.insert(0, parent_dir) \n", + "\n", + "import pyerrors as pe\n", + "import pyerrors.input.openQCD as qcdin\n", + "import pyerrors.input.sfcf as sfin\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we will have a look at the input method for the topological charge $Q_{top}$, which is measured by the program ms from the openQCD package. For now, this part still in the making and depends on an actual file. Later, this should be changed to a more efficient way of making a proper input file.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['T29L20k0.13719r2.ms.dat', 'T29L20k0.13719r3.ms.dat', 'T29L20k0.13719r1.ms.dat', 'T29L20k0.13719r4.ms.dat']\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r2\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r3\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r1\n", + "dn: 10\n", + "nn: 60\n", + "tmax: 30\n", + "eps: 0.02\n", + "max_t: 12.0\n", + "t_aim: 6.125\n", + "index_aim: 31\n", + "T29L20k0.13719r4\n" + ] + } + ], + "source": [ + "r_qtop = qcdin.read_qtop(\"../../test_data\", prefix = \"T29L20k0.13719\",full = True, r_stop = [500,440,447,410])#, files = [\"T29L20k0.13719r1.ms.dat\"], )" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'T29L20k0.13719|r1': 500, 'T29L20k0.13719|r2': 440, 'T29L20k0.13719|r3': 447, 'T29L20k0.13719|r4': 410}\n", + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -2 -2 -2 -2 -3 -3 -3 -3 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + ] + } + ], + "source": [ + "print(r_qtop.shape)\n", + "#print(r_qtop.deltas['T29L20k0.13719|r1'])\n", + "for i in r_qtop.deltas['T29L20k0.13719|r2']:\n", + " print(round(r_qtop.value + i), end =\" \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + }, + "kernelspec": { + "display_name": "Python 3.9.7 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 66cdd46a92aac3fb5d955cc105b6c8c3deb85423 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Mon, 29 Nov 2021 13:13:13 +0100 Subject: [PATCH 03/17] first ver. of just one SFCF read method --- pyerrors/input/sfcf.py | 238 ++++++++++++++++++++++++++++------------- 1 file changed, 165 insertions(+), 73 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 706e26a9..5915c56e 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -8,11 +8,11 @@ import numpy as np # Thinly-wrapped numpy from ..obs import Obs -def read_sfcf(path, prefix, name, **kwargs): - """Read sfcf C format from given folder structure. +def read_sfcf_old(path, prefix, name, quarks, noffset = 0, wf=0, wf2=0, **kwargs): + """Read sfcf format (from around 2012) from given folder structure. - Parameters - ---------- + Keyword arguments + ----------------- im -- if True, read imaginary instead of real part of the correlation function. single -- if True, read a boundary-to-boundary correlation function with a single value b2b -- if True, read a time-dependent boundary-to-boundary correlation function @@ -24,17 +24,13 @@ def read_sfcf(path, prefix, name, **kwargs): else: im = 0 part = 'real' - - if kwargs.get('single'): - b2b = 1 - single = 1 - else: - b2b = 0 - single = 0 + + b2b = 0 if kwargs.get('b2b'): b2b = 1 - + + quarks = quarks.split(" ") read = 0 T = 0 start = 0 @@ -43,7 +39,8 @@ def read_sfcf(path, prefix, name, **kwargs): ls.extend(dirnames) break if not ls: - raise Exception('Error, directory not found') + print('Error, directory not found') + #sys.exit() for exc in ls: if fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set(exc)) @@ -56,17 +53,12 @@ def read_sfcf(path, prefix, name, **kwargs): if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: - # Adjust replica names to new bookmarking system - new_names = [] - for entry in ls: - idx = entry.index('r') - new_names.append(entry[:idx] + '|' + entry[idx:]) - + new_names = ls print(replica, 'replica') for i, item in enumerate(ls): print(item) sub_ls = [] - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): + for (dirpath, dirnames, filenames) in os.walk(path+'/'+item): sub_ls.extend(dirnames) break for exc in sub_ls: @@ -75,18 +67,25 @@ def read_sfcf(path, prefix, name, **kwargs): sub_ls.sort(key=lambda x: int(x[3:])) no_cfg = len(sub_ls) print(no_cfg, 'configurations') - if i == 0: with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: for k, line in enumerate(fp): + #check if this is really the right file + pattern = "# "+name+" : offset "+str(noffset)+", wf "+"0" + #if b2b, a second wf is needed + if b2b: + pattern+=", wf_2 "+"0" + pattern+=" : "+quarks[0]+" - "+quarks[1] + if read == 1 and not line.strip() and k > start + 1: break if read == 1 and k >= start: T += 1 - if '[correlator]' in line: + if pattern in line: + #print(line) read = 1 - start = k + 7 + b2b - T -= b2b + start = k+1 + print(str(T)+" entries found.") deltas = [] for j in range(T): @@ -97,11 +96,12 @@ def read_sfcf(path, prefix, name, **kwargs): deltas[j].append(np.zeros(sublength)) for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + with open(path + '/' + item + '/' + subitem + '/'+name) as fp: for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) - deltas[k - start][i][cnfg] = floats[1 + im - single] + deltas[k-start][i][cnfg] = floats[im] + result = [] for t in range(T): @@ -110,7 +110,7 @@ def read_sfcf(path, prefix, name, **kwargs): return result -def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): +def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): """Read sfcf c format from given folder structure. Parameters @@ -121,11 +121,11 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) im -- if True, read imaginary instead of real part of the correlation function. b2b -- if True, read a time-dependent boundary-to-boundary correlation function + single -- if True, read time independent boundary to boundary correlation function names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble """ - if kwargs.get('im'): im = 1 part = 'imaginary' @@ -133,12 +133,38 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg im = 0 part = 'real' - if kwargs.get('b2b'): + if kwargs.get('single'): b2b = 1 + single = 1 else: - b2b = 0 + if kwargs.get('b2b'): + b2b = 1 + else: + b2b = 0 + single = 0 + files = [] + if "files" in kwargs: + files = kwargs.get("files") + + #due to higher usage in current projects, compact file format is default + compact = True + #get version string + version = "1.0" + known_versions = ["0.0","1.0","2.0","1.0c","2.0c"] + if "version" in kwargs: + version = kwargs.get("version") + if not version in known_versions: + raise Exception("This version is not known!") + #if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c) + if(version[-1] == "c"): + compact = True + version = version[:-1] + else: + compact = False + read = 0 T = 0 + start = 0 ls = [] for (dirpath, dirnames, filenames) in os.walk(path): ls.extend(dirnames) @@ -146,12 +172,17 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg if not ls: raise Exception('Error, directory not found') # Exclude folders with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - ls = list(set(ls) - set([exc])) + if len(files) != 0: + ls = files + else: + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*'): + ls = list(set(ls) - set([exc])) if len(ls) > 1: ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. replica = len(ls) + print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') + if 'names' in kwargs: new_names = kwargs.get('names') if len(new_names) != replica: @@ -160,52 +191,99 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg # Adjust replica names to new bookmarking system new_names = [] for entry in ls: - idx = entry.index('r') + try: + idx = entry.index('r') + except: + idx = len(entry)-2 if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) - - print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') for i, item in enumerate(ls): sub_ls = [] for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): - sub_ls.extend(filenames) + if compact: + sub_ls.extend(filenames) + else: + sub_ls.extend(dirnames) break - for exc in sub_ls: - if not fnmatch.fnmatch(exc, prefix + '*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + + #print(sub_ls) + for exc in sub_ls: + if compact: + if not fnmatch.fnmatch(exc, prefix + '*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + else: + if not fnmatch.fnmatch(exc, 'cfg*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(x[3:])) + + if compact: + first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1]) - first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1]) + last_cfg = len(sub_ls) + first_cfg - 1 - last_cfg = len(sub_ls) + first_cfg - 1 + for cfg in range(1, len(sub_ls)): + if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg: + last_cfg = cfg + first_cfg - 1 + break - for cfg in range(1, len(sub_ls)): - if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg: - last_cfg = cfg + first_cfg - 1 - break - - no_cfg = last_cfg - first_cfg + 1 - print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n') + no_cfg = last_cfg - first_cfg + 1 + print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n') + else: + no_cfg = len(sub_ls) + print(no_cfg, 'configurations') + #here we have found all the files we need to look into. if i == 0: - pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) - if b2b: - pattern += '\nwf_2 ' + str(wf2) + if compact: + + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + if b2b: + pattern += '\nwf_2 ' + str(wf2) - with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: - content = file.read() - match = re.search(pattern, content) - if match: - start_read = content.count('\n', 0, match.start()) + 5 + b2b - end_match = re.search(r'\n\s*\n', content[match.start():]) - T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b - assert T > 0 - print(T, 'entries, starting to read in line', start_read) - else: - raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: + content = file.read() + match = re.search(pattern, content) + if match: + start_read = content.count('\n', 0, match.start()) + 5 + b2b + end_match = re.search(r'\n\s*\n', content[match.start():]) + T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b + assert T > 0 + print(T, 'entries, starting to read in line', start_read) + else: + raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + else: + #print(path + '/' + item + '/')# + sub_ls[0] + '/' + name) + with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: + for k, line in enumerate(fp): + if version == "0.0": + #check if this is really the right file + pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) + #if b2b, a second wf is needed + if b2b: + pattern+=", wf_2 "+str(wf2) + qs = quarks.split(" ") + pattern+=" : "+qs[0]+" - "+qs[1] + #print(pattern) + if read == 1 and not line.strip() and k > start + 1: + break + if read == 1 and k >= start: + T += 1 + if version == "0.0": + if pattern in line: + #print(line) + read = 1 + start = k+1 + else: + if '[correlator]' in line: + read = 1 + start = k + 7 + b2b + T -= b2b + print(str(T)+" entries found.") + #we found where the correlator that is to be read is in the files deltas = [] for j in range(T): deltas.append([]) @@ -213,16 +291,30 @@ def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwarg sublength = no_cfg for j in range(T): deltas[j].append(np.zeros(sublength)) + if compact: + for cfg in range(no_cfg): + with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: + lines = fp.readlines() + if(start_read + T>len(lines)): + raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") + for k in range(start_read - 6,start_read + T): + if k == start_read - 5 - b2b: + if lines[k].strip() != 'name ' + name: + raise Exception('Wrong format', sub_ls[cfg]) + if(k >= start_read and k < start_read + T): + floats = list(map(float, lines[k].split())) + deltas[k - start_read][i][cfg] = floats[-2:][im] + else: + for cnfg, subitem in enumerate(sub_ls): + with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + for k, line in enumerate(fp): + if(k >= start and k < start + T): + floats = list(map(float, line.split())) + if version == "0.0": + deltas[k-start][i][cnfg] = floats[im] + else: + deltas[k - start][i][cnfg] = floats[1 + im - single] - for cfg in range(no_cfg): - with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: - for k, line in enumerate(fp): - if k == start_read - 5 - b2b: - if line.strip() != 'name ' + name: - raise Exception('Wrong format', sub_ls[cfg]) - if(k >= start_read and k < start_read + T): - floats = list(map(float, line.split())) - deltas[k - start_read][i][cfg] = floats[-2:][im] result = [] for t in range(T): From efa8d8a91d8e86076a8c0c89d11010fb8dd9acbb Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Wed, 15 Dec 2021 12:00:11 +0100 Subject: [PATCH 04/17] beta version of the openQCD.py input method --- pyerrors/input/openQCD.py | 150 +++++++++++++++++++++++++++++--------- 1 file changed, 114 insertions(+), 36 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 2483baa9..f11fb4e6 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -345,27 +345,71 @@ def _read_array_openQCD2(fp): return {'d': d, 'n': n, 'size': size, 'arr': arr} -def read_qtop(path, prefix, version = "1.2",**kwargs): +def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): """Read qtop format from given folder structure. Parameters ---------- - target -- specifies the topological sector to be reweighted to (default 0) - full -- if true read the charge instead of the reweighting factor. + path: + path of the measurement files + prefix: + prefix of the measurement files, e.g. _id0_r0.ms.dat + c: + ??? + dtr_cnfg: + ??? + target: int + specifies the topological sector to be reweighted to (default 0) + full: bool + if true read the charge instead of the reweighting factor. + version: str + version string of the openQCD (sfqcd) version used to create the ensemble + steps: int + step size of measurements + L: int + spatial length of the lattice in L/a. HAS to be set if version != sfqcd, since openQCD does not provide this in the header + r_start: list + offset of the first ensemble, making it easier to match later on with other Obs + r_stop: list + last ensemble that needs to be read + r_meas_start: list + offset of the first measured ensemble, if there is any + files: list + specify the exact files that need to be read from path, pratical if e.g. only one replicum is needed + names: list + Alternative labeling for replicas/ensembles. Has to have the appropriate length """ - dtr_cnfg = 4 - L = 20 - c = 0.35 + #dtr_cnfg = 4# was ist das denn hier? + #one could read L from the header in case of sfQCD + #c = 0.35 + known_versions = ["1.0","1.2","1.4","1.6","2.0", "sfqcd"] + version = "1.2" + if "version" in kwargs: + version = kwargs.get("version") + if not version in known_versions: + raise Exception("Unknown openQCD version.") target = 0 full = False - + if "steps" in kwargs: + steps = kwargs.get("steps") + else: + steps = 1 if 'target' in kwargs: target = kwargs.get('target') - - + if version == "sfqcd": + if "L" in kwargs: + supposed_L = kwargs.get("L") + else: + if not "L" in kwargs: + raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.") + else: + L = kwargs.get("L") if kwargs.get('full'): full = True - + r_start = 1 + r_meas_start = 1 + if "r_meas_start" in kwargs: + r_meas_start = kwargs.get("r_meas_start") if "r_start" in kwargs: r_start = kwargs.get("r_start") if "r_stop" in kwargs: @@ -392,41 +436,62 @@ def read_qtop(path, prefix, version = "1.2",**kwargs): for rep,file in enumerate(files): with open(path+"/"+file, "rb") as fp: - #this, for now, is only for version 1.2 + #this, for now, is for version 1.2,1.4,1.6 and 2.0, but needs to be tested for the last 3, isncethe doc says its the same #header t = fp.read(12) - header = struct.unpack('iii', t) - dn = header[0] - nn = header[1] - tmax = header[2] - print('dn:', dn) + header = struct.unpack(' Date: Fri, 17 Dec 2021 12:08:08 +0100 Subject: [PATCH 05/17] hotfix, missing kwarg files in read_rwms method --- pyerrors/input/openQCD.py | 16 +++++++++------- pyerrors/input/utils.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 pyerrors/input/utils.py diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index f11fb4e6..08bb0223 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -39,13 +39,15 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): if not ls: raise Exception('Error, directory not found') - - # Exclude files with different names - for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'): - ls = list(set(ls) - set([exc])) - if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) + if 'files' in kwargs: + ls = kwargs.get('files') + else: + # Exclude files with different names + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'): + ls = list(set(ls) - set([exc])) + if len(ls) > 1: + ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) replica = len(ls) if 'r_start' in kwargs: diff --git a/pyerrors/input/utils.py b/pyerrors/input/utils.py new file mode 100644 index 00000000..f4264587 --- /dev/null +++ b/pyerrors/input/utils.py @@ -0,0 +1,15 @@ +import fnmatch + +def check_missing(idl,che): + missing = [] + for ind in che: + if not ind in idl: + missing.append(ind) + if(len(missing) == 0): + print("There are no measurements missing.") + else: + print(len(missing),"measurements missing") + miss_str = str(missing[0]) + for i in missing[1:]: + miss_str += ","+str(i) + print(miss_str) From c5292f8342469c731854c81005cf235ed1288222 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Fri, 17 Dec 2021 15:16:17 +0100 Subject: [PATCH 06/17] implemented idl into sfcf-read method --- pyerrors/input/sfcf.py | 255 +++++++++++++++------------------------- pyerrors/input/utils.py | 17 ++- 2 files changed, 105 insertions(+), 167 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 5915c56e..8ba9a3da 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -6,125 +6,41 @@ import fnmatch import re import numpy as np # Thinly-wrapped numpy from ..obs import Obs - - -def read_sfcf_old(path, prefix, name, quarks, noffset = 0, wf=0, wf2=0, **kwargs): - """Read sfcf format (from around 2012) from given folder structure. - - Keyword arguments - ----------------- - im -- if True, read imaginary instead of real part of the correlation function. - single -- if True, read a boundary-to-boundary correlation function with a single value - b2b -- if True, read a time-dependent boundary-to-boundary correlation function - names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length - """ - if kwargs.get('im'): - im = 1 - part = 'imaginary' - else: - im = 0 - part = 'real' - - b2b = 0 - - if kwargs.get('b2b'): - b2b = 1 - - quarks = quarks.split(" ") - read = 0 - T = 0 - start = 0 - ls = [] - for (dirpath, dirnames, filenames) in os.walk(path): - ls.extend(dirnames) - break - if not ls: - print('Error, directory not found') - #sys.exit() - for exc in ls: - if fnmatch.fnmatch(exc, prefix + '*'): - ls = list(set(ls) - set(exc)) - if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) - replica = len(ls) - print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica') - if 'names' in kwargs: - new_names = kwargs.get('names') - if len(new_names) != replica: - raise Exception('Names does not have the required length', replica) - else: - new_names = ls - print(replica, 'replica') - for i, item in enumerate(ls): - print(item) - sub_ls = [] - for (dirpath, dirnames, filenames) in os.walk(path+'/'+item): - sub_ls.extend(dirnames) - break - for exc in sub_ls: - if fnmatch.fnmatch(exc, 'cfg*'): - sub_ls = list(set(sub_ls) - set(exc)) - sub_ls.sort(key=lambda x: int(x[3:])) - no_cfg = len(sub_ls) - print(no_cfg, 'configurations') - if i == 0: - with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: - for k, line in enumerate(fp): - #check if this is really the right file - pattern = "# "+name+" : offset "+str(noffset)+", wf "+"0" - #if b2b, a second wf is needed - if b2b: - pattern+=", wf_2 "+"0" - pattern+=" : "+quarks[0]+" - "+quarks[1] - - if read == 1 and not line.strip() and k > start + 1: - break - if read == 1 and k >= start: - T += 1 - if pattern in line: - #print(line) - read = 1 - start = k+1 - print(str(T)+" entries found.") - - deltas = [] - for j in range(T): - deltas.append([]) - - sublength = len(sub_ls) - for j in range(T): - deltas[j].append(np.zeros(sublength)) - - for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem + '/'+name) as fp: - for k, line in enumerate(fp): - if(k >= start and k < start + T): - floats = list(map(float, line.split())) - deltas[k-start][i][cnfg] = floats[im] - - - result = [] - for t in range(T): - result.append(Obs(deltas[t], new_names)) - - return result - +from . import utils def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): """Read sfcf c format from given folder structure. Parameters ---------- - quarks -- Label of the quarks used in the sfcf input file - noffset -- Offset of the source (only relevant when wavefunctions are used) - wf -- ID of wave function - wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) - im -- if True, read imaginary instead of real part of the correlation function. - b2b -- if True, read a time-dependent boundary-to-boundary correlation function - single -- if True, read time independent boundary to boundary correlation function - names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length + quarks: str + Label of the quarks used in the sfcf input file. e.g. "quark quark" + for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, + this is done automatically for this version + noffset: int + Offset of the source (only relevant when wavefunctions are used) + wf: int + ID of wave function + wf2: int + ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) + im: bool + if True, read imaginary instead of real part of the correlation function. + b2b: bool + if True, read a time-dependent boundary-to-boundary correlation function + single: bool + if True, read time independent boundary to boundary correlation function + names: list + Alternative labeling for replicas/ensembles. Has to have the appropriate length ens_name : str replaces the name of the ensemble + version: str + version of SFCF, with which the measurement was done. if the compact output option (-c) was spectified, append a c to the version (e.g. "1.0c") + replica: list + list of replica to be read, default is all + files: list + list of files to be read per replica, default is all. for non-conpact ouztput format, hand the folders to be read here. + check_configs: + list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs """ if kwargs.get('im'): im = 1 @@ -142,8 +58,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) else: b2b = 0 single = 0 - - files = [] + if "replica" in kwargs: + reps = kwargs.get("replica") if "files" in kwargs: files = kwargs.get("files") @@ -172,8 +88,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) if not ls: raise Exception('Error, directory not found') # Exclude folders with different names - if len(files) != 0: - ls = files + if "replica" in kwargs: + ls = reps else: for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*'): @@ -182,9 +98,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. replica = len(ls) print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') - + idl = [] if 'names' in kwargs: new_names = kwargs.get('names') + if len(new_names)!=len(set(new_names)): + raise Exception("names are nor unique!") if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: @@ -194,59 +112,65 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) try: idx = entry.index('r') except: - idx = len(entry)-2 + raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") + if 'ens_name' in kwargs: new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) for i, item in enumerate(ls): sub_ls = [] - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): - if compact: - sub_ls.extend(filenames) - else: - sub_ls.extend(dirnames) - break - - #print(sub_ls) - for exc in sub_ls: - if compact: - if not fnmatch.fnmatch(exc, prefix + '*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) - else: - if not fnmatch.fnmatch(exc, 'cfg*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(x[3:])) - - if compact: - first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1]) - - last_cfg = len(sub_ls) + first_cfg - 1 - - for cfg in range(1, len(sub_ls)): - if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg: - last_cfg = cfg + first_cfg - 1 - break - - no_cfg = last_cfg - first_cfg + 1 - print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n') + if "files" in kwargs: + sub_ls = kwargs.get("files") + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: - no_cfg = len(sub_ls) - print(no_cfg, 'configurations') - - #here we have found all the files we need to look into. + for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): + if compact: + sub_ls.extend(filenames) + else: + sub_ls.extend(dirnames) + break + + #print(sub_ls) + for exc in sub_ls: + if compact: + if not fnmatch.fnmatch(exc, prefix + '*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + else: + if not fnmatch.fnmatch(exc, 'cfg*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(x[3:])) + #print(sub_ls) + rep_idl = [] + no_cfg = len(sub_ls) + for cfg in sub_ls: + try: + if compact: + rep_idl.append(int(cfg.split("n")[-1])) + else: + rep_idl.append(int(cfg[3:])) + except: + raise Exception("Couldn't parse idl from directroy, problem with file "+cfg) + rep_idl.sort() + #maybe there is a better way to print the idls + print(item, ':', no_cfg, ' configurations') + idl.append(rep_idl) + #here we have found all the files we need to look into. if i == 0: + #here, we want to find the place within the file, where the correlator we need is stored. + if compact: - + #to do so, the pattern needed is put together from the input values pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) - + #and the file is parsed through to find the pattern with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: content = file.read() match = re.search(pattern, content) if match: + #the start and end point of the correlator in quaetion is extracted for later use in the other files start_read = content.count('\n', 0, match.start()) + 5 + b2b end_match = re.search(r'\n\s*\n', content[match.start():]) T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b @@ -255,11 +179,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) else: raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') else: - #print(path + '/' + item + '/')# + sub_ls[0] + '/' + name) + #this part does the same as above, but for non-compactified versions of the files with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: for k, line in enumerate(fp): if version == "0.0": - #check if this is really the right file + #check if this is really the right file by matchin pattern similar to above pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) #if b2b, a second wf is needed if b2b: @@ -284,19 +208,24 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) T -= b2b print(str(T)+" entries found.") #we found where the correlator that is to be read is in the files + #after preparing the datastructure the correlators get parsed into... deltas = [] for j in range(T): deltas.append([]) - + sublength = no_cfg for j in range(T): deltas[j].append(np.zeros(sublength)) + #... the actual parsing can start. we iterate through all measurement files in the path given... if compact: for cfg in range(no_cfg): with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: lines = fp.readlines() + #check, if the correlator is in fact printed completely if(start_read + T>len(lines)): raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") + #and start to read the correlator. + #the range here is chosen like this, since this allows for implementing a security check for every read correlator later... for k in range(start_read - 6,start_read + T): if k == start_read - 5 - b2b: if lines[k].strip() != 'name ' + name: @@ -307,6 +236,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) else: for cnfg, subitem in enumerate(sub_ls): with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + #since the non-compatified files are typically not so long, we can iterate over the whole file. + #here one can also implement the chekc from above. for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) @@ -315,9 +246,17 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) else: deltas[k - start][i][cnfg] = floats[1 + im - single] - + if "check_configs" in kwargs: + print("Chekcing for missing configs...") + che = kwargs.get("check_configs") + if not (len(che) == len(idl)): + raise Exception("check_configs has to be the same length as replica!") + for r in range(len(idl)): + print("checking "+new_names[r]) + utils.check_idl(idl[r], che[r]) + print("Done") result = [] for t in range(T): - result.append(Obs(deltas[t], new_names)) + result.append(Obs(deltas[t], new_names, idl = idl)) return result diff --git a/pyerrors/input/utils.py b/pyerrors/input/utils.py index f4264587..a8dd026e 100644 --- a/pyerrors/input/utils.py +++ b/pyerrors/input/utils.py @@ -1,14 +1,13 @@ -import fnmatch +"""Utilities for the input""" -def check_missing(idl,che): +def check_idl(idl,che): missing = [] - for ind in che: - if not ind in idl: - missing.append(ind) - if(len(missing) == 0): - print("There are no measurements missing.") - else: - print(len(missing),"measurements missing") + for c in che: + if not c in idl: + missing.append(c) + #print missing such that it can directly be parsed to slurm terminal + if not (len(missing) == 0): + print(len(missing),"configs missing") miss_str = str(missing[0]) for i in missing[1:]: miss_str += ","+str(i) From b55e410dcf27760c1709bcde67ca13df52cc2fa0 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Fri, 17 Dec 2021 15:20:04 +0100 Subject: [PATCH 07/17] input/__init__ edited to include utils --- pyerrors/input/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyerrors/input/__init__.py b/pyerrors/input/__init__.py index 2797841c..23948b4c 100644 --- a/pyerrors/input/__init__.py +++ b/pyerrors/input/__init__.py @@ -4,3 +4,4 @@ from . import json from . import misc from . import openQCD from . import sfcf +from . import utils From 01ada964b271d583e0e78f37157aacc5516616e9 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Mon, 3 Jan 2022 11:20:25 +0100 Subject: [PATCH 08/17] added read_qtop_sector method outsourcing funtionality of former 'full' key --- pyerrors/input/openQCD.py | 107 +++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 08bb0223..f0b3a3df 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -8,6 +8,7 @@ import struct import numpy as np # Thinly-wrapped numpy from ..obs import Obs from ..fits import fit_lin +from . import utils def read_rwms(path, prefix, version='2.0', names=None, **kwargs): @@ -347,7 +348,7 @@ def _read_array_openQCD2(fp): return {'d': d, 'n': n, 'size': size, 'arr': arr} -def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): +def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): """Read qtop format from given folder structure. Parameters @@ -356,14 +357,12 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): path of the measurement files prefix: prefix of the measurement files, e.g. _id0_r0.ms.dat - c: - ??? - dtr_cnfg: - ??? - target: int - specifies the topological sector to be reweighted to (default 0) - full: bool - if true read the charge instead of the reweighting factor. + c: double + Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L + dtr_cnfg: int + (optional) parameter that specifies the number of trajectories between two configs. + if it is not set, the distance between two measurements in the file is assumed to be + the distance between two configurations. version: str version string of the openQCD (sfqcd) version used to create the ensemble steps: int @@ -373,7 +372,7 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): r_start: list offset of the first ensemble, making it easier to match later on with other Obs r_stop: list - last ensemble that needs to be read + last configurations that need to be read (per replicum) r_meas_start: list offset of the first measured ensemble, if there is any files: list @@ -385,13 +384,10 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): #one could read L from the header in case of sfQCD #c = 0.35 known_versions = ["1.0","1.2","1.4","1.6","2.0", "sfqcd"] - version = "1.2" - if "version" in kwargs: - version = kwargs.get("version") - if not version in known_versions: - raise Exception("Unknown openQCD version.") + + if not version in known_versions: + raise Exception("Unknown openQCD version.") target = 0 - full = False if "steps" in kwargs: steps = kwargs.get("steps") else: @@ -406,8 +402,6 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.") else: L = kwargs.get("L") - if kwargs.get('full'): - full = True r_start = 1 r_meas_start = 1 if "r_meas_start" in kwargs: @@ -445,8 +439,6 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1,**kwargs): dn = header[0] # step size in integration steps "dnms" nn = header[1] # number of measurements, so "ntot"/dn tmax = header[2]# lattice T/a - #hier fehlen die L/a Angaben im header von Simon - #also muss man L nur für den fall von Fabian setzen if version == "sfqcd": t = fp.read(12) Ls = struct.unpack(' Date: Mon, 3 Jan 2022 14:40:12 +0100 Subject: [PATCH 09/17] read_qtop now also hands over idl of the result Obs --- pyerrors/input/openQCD.py | 92 +++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index f0b3a3df..5c44fd2f 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -363,10 +363,11 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): (optional) parameter that specifies the number of trajectories between two configs. if it is not set, the distance between two measurements in the file is assumed to be the distance between two configurations. + steps: int + (optional) (maybe only necessary for openQCD2.0) + nt step size, guessed if not given version: str version string of the openQCD (sfqcd) version used to create the ensemble - steps: int - step size of measurements L: int spatial length of the lattice in L/a. HAS to be set if version != sfqcd, since openQCD does not provide this in the header r_start: list @@ -380,7 +381,6 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): names: list Alternative labeling for replicas/ensembles. Has to have the appropriate length """ - #dtr_cnfg = 4# was ist das denn hier? #one could read L from the header in case of sfQCD #c = 0.35 known_versions = ["1.0","1.2","1.4","1.6","2.0", "sfqcd"] @@ -390,8 +390,7 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): target = 0 if "steps" in kwargs: steps = kwargs.get("steps") - else: - steps = 1 + if 'target' in kwargs: target = kwargs.get('target') if version == "sfqcd": @@ -429,6 +428,7 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): rep_names = [] deltas = [] + idl = [] for rep,file in enumerate(files): with open(path+"/"+file, "rb") as fp: @@ -457,30 +457,34 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): print('eps:', eps) Q = [] - - i = r_meas_start*steps + ncs = [] while 0 < 1: t = fp.read(4) #int nt if(len(t) < 4): break - nc = struct.unpack('i',t)[0] - if(nc != i): - print(nc) - raise Exception('Config ' + str(i) + ' missing?') - else: - t = fp.read(8 * tmax * (nn + 1))#Wsl - t = fp.read(8 * tmax * (nn + 1))#Ysl - t = fp.read(8 * tmax * (nn + 1))#Qsl, which is asked for in this method - #unpack the array of Qtops, on each timeslice t=0,...,tmax-1 and the - #measurement number in = 0...nn (see README.qcd1) - tmpd = struct.unpack('d' * tmax * (nn + 1), t) - Q.append(tmpd) - i += 1*steps + ncs.append(struct.unpack('i',t)[0]) + t = fp.read(8 * tmax * (nn + 1))#Wsl + t = fp.read(8 * tmax * (nn + 1))#Ysl + t = fp.read(8 * tmax * (nn + 1))#Qsl, which is asked for in this method + #unpack the array of Qtops, on each timeslice t=0,...,tmax-1 and the + #measurement number in = 0...nn (see README.qcd1) + tmpd = struct.unpack('d' * tmax * (nn + 1), t) + Q.append(tmpd) #set step by reading all entries, then set stepsize, then check if everything is there #make a dtr_config param, which is checked against difference... #difference != step - #len(set(difference)) == 1 - #!!!also implement the idl stuff for everything... + + if not len(set([ncs[i]-ncs[i-1] for i in range(1,len(ncs))])): + raise Exception("Irregularities in stepsize found") + else: + if 'steps' in kwargs: + if steps != ncs[1]-ncs[0]: + raise Exception("steps and the found stepsize are not the same") + else: + steps = ncs[1]-ncs[0] + if ncs[0]//steps == ncs[0]/steps: + r_meas_start = ncs[0]//steps + print(len(Q)) print('max_t:', dn * (nn) * eps) @@ -499,36 +503,40 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): Q_round = [] for i in range(len(Q) // dtr_cnfg): Q_round.append(round(Q_sum[dtr_cnfg * i][index_aim])) - - replica = len(files) - - truncated_file = file[:-7] #as seen in previous examples, this could lead to some weird behaviour... maybe -7 fixes this. - print(truncated_file) - try: - idx = truncated_file.index('r') - except: - if not "names" in kwargs: - raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") + if len(Q_round) != len(ncs)//dtr_cnfg: + raise Exception("qtops and ncs dont have the same length") + + #replica = len(files) + + truncated_file = file[:-7] + print(truncated_file) + idl_start = 1 - # this might be a quite fishy way to find out which replicum we are actually talking about... if "r_start" in kwargs: - Q_round = Q_round[r_start[int(truncated_file[idx+1:])-1]:] + Q_round = Q_round[r_start[rep]:] + idl_start = r_start[rep] if "r_stop" in kwargs: - Q_round = Q_round[:r_stop[int(truncated_file[idx+1:])-1]] - if "ens_name" in kwargs: - ens_name = kwargs.get("ens_name") - else: - ens_name = truncated_file[:idx] + Q_round = Q_round[:r_stop[rep]] + idl_stop = idl_start+len(Q_round) #keyword "names" prevails over "ens_name" if not "names" in kwargs: + try: + idx = truncated_file.index('r') + except: + if not "names" in kwargs: + raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") + if "ens_name" in kwargs: + ens_name = kwargs.get("ens_name") + else: + ens_name = truncated_file[:idx] rep_names.append(ens_name + '|' + truncated_file[idx:]) else: names = kwargs.get("names") rep_names = names deltas.append(np.array(Q_round)) - - - result = Obs(deltas, rep_names) + idl.append(range(idl_start,idl_stop)) + #print(idl) + result = Obs(deltas, rep_names, idl = idl) return result def read_qtop_sector(target = 0, **kwargs): From b8b1d3612556e1bfdb06526d0c79f2a03cfe2b03 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Mon, 3 Jan 2022 14:41:14 +0100 Subject: [PATCH 10/17] small bug fixes after first pull request --- pyerrors/input/sfcf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 8ba9a3da..6382e5f0 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -8,7 +8,7 @@ import numpy as np # Thinly-wrapped numpy from ..obs import Obs from . import utils -def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs): +def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0", **kwargs): """Read sfcf c format from given folder structure. Parameters @@ -66,8 +66,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) #due to higher usage in current projects, compact file format is default compact = True #get version string - version = "1.0" - known_versions = ["0.0","1.0","2.0","1.0c","2.0c"] + known_versions = ["0.0","1.0","2.0","1.0c","2.0c","1.0a","2.0a"] if "version" in kwargs: version = kwargs.get("version") if not version in known_versions: @@ -247,7 +246,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs) deltas[k - start][i][cnfg] = floats[1 + im - single] if "check_configs" in kwargs: - print("Chekcing for missing configs...") + print("Checking for missing configs...") che = kwargs.get("check_configs") if not (len(che) == len(idl)): raise Exception("check_configs has to be the same length as replica!") From 31c2ada963593d3f41c0ba25ed214212f7842391 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Mon, 3 Jan 2022 14:46:19 +0100 Subject: [PATCH 11/17] corrected small error in init --- pyerrors/input/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyerrors/input/__init__.py b/pyerrors/input/__init__.py index 23948b4c..2797841c 100644 --- a/pyerrors/input/__init__.py +++ b/pyerrors/input/__init__.py @@ -4,4 +4,3 @@ from . import json from . import misc from . import openQCD from . import sfcf -from . import utils From ebdc17aa664f8096d4372f09427546125ec47cd7 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Mon, 3 Jan 2022 21:34:04 +0100 Subject: [PATCH 12/17] implemented read_sfcf for append-mode output, bug fixes --- pyerrors/input/sfcf.py | 401 +++++++++++++++++++++++++---------------- 1 file changed, 249 insertions(+), 152 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 6382e5f0..5095e3ce 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -8,7 +8,7 @@ import numpy as np # Thinly-wrapped numpy from ..obs import Obs from . import utils -def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0", **kwargs): +def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0c", **kwargs): """Read sfcf c format from given folder structure. Parameters @@ -65,186 +65,283 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = #due to higher usage in current projects, compact file format is default compact = True + appended = False #get version string known_versions = ["0.0","1.0","2.0","1.0c","2.0c","1.0a","2.0a"] - if "version" in kwargs: - version = kwargs.get("version") - if not version in known_versions: - raise Exception("This version is not known!") - #if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c) - if(version[-1] == "c"): - compact = True - version = version[:-1] - else: - compact = False + + if not version in known_versions: + raise Exception("This version is not known!") + #if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c) + if(version[-1] == "c"): + appended = False + compact = True + version = version[:-1] + elif(version[-1] == "a"): + appended = True + compact = False + version = version[:-1] + else: + compact = False + appended = False read = 0 T = 0 start = 0 ls = [] - for (dirpath, dirnames, filenames) in os.walk(path): - ls.extend(dirnames) - break - if not ls: - raise Exception('Error, directory not found') - # Exclude folders with different names if "replica" in kwargs: ls = reps else: + for (dirpath, dirnames, filenames) in os.walk(path): + if not appended: + ls.extend(dirnames) + else: + ls.extend(filenames) + break + if not ls: + raise Exception('Error, directory not found') + # Exclude folders with different names for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set([exc])) if len(ls) > 1: ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. - replica = len(ls) + if not appended: + replica = len(ls) + else: + replica = len([l.split(".")[-1] for l in ls])//len(set([l.split(".")[-1] for l in ls])) print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') - idl = [] if 'names' in kwargs: new_names = kwargs.get('names') if len(new_names)!=len(set(new_names)): - raise Exception("names are nor unique!") + raise Exception("names are not unique!") if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: # Adjust replica names to new bookmarking system + new_names = [] - for entry in ls: - try: - idx = entry.index('r') - except: - raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") - - if 'ens_name' in kwargs: - new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) - else: - new_names.append(entry[:idx] + '|' + entry[idx:]) - for i, item in enumerate(ls): - sub_ls = [] - if "files" in kwargs: - sub_ls = kwargs.get("files") - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + if not appended: + for entry in ls: + try: + idx = entry.index('r') + except: + raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") + + if 'ens_name' in kwargs: + new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) + else: + new_names.append(entry[:idx] + '|' + entry[idx:]) else: - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): - if compact: - sub_ls.extend(filenames) - else: - sub_ls.extend(dirnames) - break - - #print(sub_ls) - for exc in sub_ls: - if compact: - if not fnmatch.fnmatch(exc, prefix + '*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) - else: - if not fnmatch.fnmatch(exc, 'cfg*'): - sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(x[3:])) - #print(sub_ls) - rep_idl = [] - no_cfg = len(sub_ls) - for cfg in sub_ls: - try: - if compact: - rep_idl.append(int(cfg.split("n")[-1])) - else: - rep_idl.append(int(cfg[3:])) - except: - raise Exception("Couldn't parse idl from directroy, problem with file "+cfg) - rep_idl.sort() - #maybe there is a better way to print the idls - print(item, ':', no_cfg, ' configurations') - idl.append(rep_idl) - #here we have found all the files we need to look into. - if i == 0: - #here, we want to find the place within the file, where the correlator we need is stored. - if compact: - #to do so, the pattern needed is put together from the input values - pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) - if b2b: - pattern += '\nwf_2 ' + str(wf2) - #and the file is parsed through to find the pattern - with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: - content = file.read() - match = re.search(pattern, content) - if match: - #the start and end point of the correlator in quaetion is extracted for later use in the other files - start_read = content.count('\n', 0, match.start()) + 5 + b2b - end_match = re.search(r'\n\s*\n', content[match.start():]) - T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b - assert T > 0 - print(T, 'entries, starting to read in line', start_read) - else: - raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*.'+name): + ls = list(set(ls) - set([exc])) + ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + for entry in ls: + myentry = entry.removesuffix("."+name) + try: + idx = myentry.index('r') + except: + raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") + + if 'ens_name' in kwargs: + new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) + else: + new_names.append(myentry[:idx] + '|' + myentry[idx:]) + #print(new_names) + idl = [] + if not appended: + for i, item in enumerate(ls): + sub_ls = [] + if "files" in kwargs: + sub_ls = kwargs.get("files") + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: - #this part does the same as above, but for non-compactified versions of the files - with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: - for k, line in enumerate(fp): - if version == "0.0": - #check if this is really the right file by matchin pattern similar to above - pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) - #if b2b, a second wf is needed - if b2b: - pattern+=", wf_2 "+str(wf2) - qs = quarks.split(" ") - pattern+=" : "+qs[0]+" - "+qs[1] - #print(pattern) - if read == 1 and not line.strip() and k > start + 1: - break - if read == 1 and k >= start: - T += 1 - - if version == "0.0": - if pattern in line: - #print(line) - read = 1 - start = k+1 + for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): + if compact: + sub_ls.extend(filenames) + else: + sub_ls.extend(dirnames) + break + + #print(sub_ls) + for exc in sub_ls: + if compact: + if not fnmatch.fnmatch(exc, prefix + '*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + else: + if not fnmatch.fnmatch(exc, 'cfg*'): + sub_ls = list(set(sub_ls) - set([exc])) + sub_ls.sort(key=lambda x: int(x[3:])) + #print(sub_ls) + rep_idl = [] + no_cfg = len(sub_ls) + for cfg in sub_ls: + try: + if compact: + rep_idl.append(int(cfg.split("n")[-1])) + else: + rep_idl.append(int(cfg[3:])) + except: + raise Exception("Couldn't parse idl from directroy, problem with file "+cfg) + rep_idl.sort() + #maybe there is a better way to print the idls + print(item, ':', no_cfg, ' configurations') + idl.append(rep_idl) + #here we have found all the files we need to look into. + if i == 0: + #here, we want to find the place within the file, where the correlator we need is stored. + if compact: + #to do so, the pattern needed is put together from the input values + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + if b2b: + pattern += '\nwf_2 ' + str(wf2) + #and the file is parsed through to find the pattern + with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: + content = file.read() + match = re.search(pattern, content) + if match: + #the start and end point of the correlator in quaetion is extracted for later use in the other files + start_read = content.count('\n', 0, match.start()) + 5 + b2b + end_match = re.search(r'\n\s*\n', content[match.start():]) + T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b + assert T > 0 + print(T, 'entries, starting to read in line', start_read) else: - if '[correlator]' in line: - read = 1 - start = k + 7 + b2b - T -= b2b - print(str(T)+" entries found.") - #we found where the correlator that is to be read is in the files - #after preparing the datastructure the correlators get parsed into... - deltas = [] - for j in range(T): - deltas.append([]) - - sublength = no_cfg - for j in range(T): - deltas[j].append(np.zeros(sublength)) - #... the actual parsing can start. we iterate through all measurement files in the path given... - if compact: - for cfg in range(no_cfg): - with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: - lines = fp.readlines() - #check, if the correlator is in fact printed completely - if(start_read + T>len(lines)): - raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") - #and start to read the correlator. - #the range here is chosen like this, since this allows for implementing a security check for every read correlator later... - for k in range(start_read - 6,start_read + T): - if k == start_read - 5 - b2b: - if lines[k].strip() != 'name ' + name: - raise Exception('Wrong format', sub_ls[cfg]) - if(k >= start_read and k < start_read + T): - floats = list(map(float, lines[k].split())) - deltas[k - start_read][i][cfg] = floats[-2:][im] - else: - for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem + '/' + name) as fp: - #since the non-compatified files are typically not so long, we can iterate over the whole file. - #here one can also implement the chekc from above. - for k, line in enumerate(fp): - if(k >= start and k < start + T): - floats = list(map(float, line.split())) + raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + else: + #this part does the same as above, but for non-compactified versions of the files + with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: + for k, line in enumerate(fp): if version == "0.0": - deltas[k-start][i][cnfg] = floats[im] - else: - deltas[k - start][i][cnfg] = floats[1 + im - single] + #check if this is really the right file by matchin pattern similar to above + pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) + #if b2b, a second wf is needed + if b2b: + pattern+=", wf_2 "+str(wf2) + qs = quarks.split(" ") + pattern+=" : "+qs[0]+" - "+qs[1] + #print(pattern) + if read == 1 and not line.strip() and k > start + 1: + break + if read == 1 and k >= start: + T += 1 + if version == "0.0": + if pattern in line: + #print(line) + read = 1 + start = k+1 + else: + if '[correlator]' in line: + read = 1 + start = k + 7 + b2b + T -= b2b + print(str(T)+" entries found.") + #we found where the correlator that is to be read is in the files + #after preparing the datastructure the correlators get parsed into... + deltas = [] + for j in range(T): + deltas.append([]) + + + for t in range(T): + deltas[t].append(np.zeros(no_cfg)) + #... the actual parsing can start. we iterate through all measurement files in the path given... + if compact: + for cfg in range(no_cfg): + with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: + lines = fp.readlines() + #check, if the correlator is in fact printed completely + if(start_read + T>len(lines)): + raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") + #and start to read the correlator. + #the range here is chosen like this, since this allows for implementing a security check for every read correlator later... + for k in range(start_read - 6,start_read + T): + if k == start_read - 5 - b2b: + if lines[k].strip() != 'name ' + name: + raise Exception('Wrong format', sub_ls[cfg]) + if(k >= start_read and k < start_read + T): + floats = list(map(float, lines[k].split())) + deltas[k - start_read][i][cfg] = floats[-2:][im] + else: + for cnfg, subitem in enumerate(sub_ls): + with open(path + '/' + item + '/' + subitem + '/' + name) as fp: + #since the non-compatified files are typically not so long, we can iterate over the whole file. + #here one can also implement the chekc from above. + for k, line in enumerate(fp): + if(k >= start and k < start + T): + floats = list(map(float, line.split())) + if version == "0.0": + deltas[k-start][i][cnfg] = floats[im] + else: + deltas[k - start][i][cnfg] = floats[1 + im - single] + + else: + for exc in ls: + if not fnmatch.fnmatch(exc, prefix + '*.'+name): + ls = list(set(ls) - set([exc])) + ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + #print(ls) + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + if b2b: + pattern += '\nwf_2 ' + str(wf2) + for rep,file in enumerate(ls): + rep_idl = [] + with open(path + '/' + file, 'r') as fp: + content = fp.readlines() + data_starts = [] + for l,line in enumerate(content): + if "[run]" in line: + data_starts.append(l) + if len(set([data_starts[i]-data_starts[i-1] for i in range(1,len(data_starts))])) > 1: + raise Exception ("Irregularities in file structure found, not all runs have the same output length") + #print(data_starts) + #first chunk of data + chunk = content[:data_starts[1]] + for l,line in enumerate(chunk): + if line.startswith("gauge_name"): + gauge_line = l + #meta_data["gauge_name"] = (line.strip()).split("/")[-1] + elif line.startswith("[correlator]"): + corr_line = l + found_pat = "" + for li in chunk[corr_line+1:corr_line+6+b2b]: + found_pat += li + if re.search(pattern,found_pat): + start_read = corr_line+7+b2b + T=len(chunk)-1-start_read + if rep == 0: + deltas = [] + for t in range(T): + deltas.append([]) + for t in range(T): + deltas[t].append(np.zeros(len(data_starts))) + #all other chunks should follow the same structure + for cnfg in range(len(data_starts)): + start = data_starts[cnfg] + stop = start+data_starts[1] + chunk = content[start:stop] + #meta_data = {} + + try: + rep_idl.append(int(chunk[gauge_line].split("n")[-1])) + except: + raise Exception("Couldn't parse idl from directroy, problem with chunk around line "+gauge_line) + + found_pat = "" + for li in chunk[corr_line+1:corr_line+6+b2b]: + found_pat += li + if re.search(pattern,found_pat): + #print("found pattern") + for t,line in enumerate(chunk[start_read:start_read+T]): + floats = list(map(float, line.split())) + deltas[t][rep][cnfg] = floats[-2:][im] + idl.append(rep_idl) + + #print(new_names) + #print(deltas) + #print(idl) if "check_configs" in kwargs: print("Checking for missing configs...") che = kwargs.get("check_configs") From 53f727092d46113e2d4a335bfdc85b5393227be3 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Mon, 10 Jan 2022 11:45:42 +0100 Subject: [PATCH 13/17] small bug fix enabling older versions of python --- pyerrors/input/sfcf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 5095e3ce..af14041b 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -137,7 +137,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) for entry in ls: - myentry = entry.removesuffix("."+name) + myentry = entry[:-len(name)-1] + print(myentry) try: idx = myentry.index('r') except: From 302a7ae439b2ce93862201dfa4bc2437a2ec3af8 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Fri, 14 Jan 2022 16:00:40 +0100 Subject: [PATCH 14/17] flake8 compliance --- pyerrors/input/sfcf.py | 277 +++++++++++++++++++++++++---------------- 1 file changed, 168 insertions(+), 109 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index af14041b..c58c0dd2 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -8,39 +8,55 @@ import numpy as np # Thinly-wrapped numpy from ..obs import Obs from . import utils -def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0c", **kwargs): + +def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, + version="1.0c", **kwargs): """Read sfcf c format from given folder structure. Parameters ---------- quarks: str Label of the quarks used in the sfcf input file. e.g. "quark quark" - for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, + for version 0.0 this does NOT need to be given with the typical " - " + that is present in the output file, this is done automatically for this version noffset: int Offset of the source (only relevant when wavefunctions are used) wf: int ID of wave function wf2: int - ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions) + ID of the second wavefunction + (only relevant for boundary-to-boundary correlation functions) im: bool - if True, read imaginary instead of real part of the correlation function. + if True, read imaginary instead of real part + of the correlation function. b2b: bool - if True, read a time-dependent boundary-to-boundary correlation function + if True, read a time-dependent boundary-to-boundary + correlation function single: bool - if True, read time independent boundary to boundary correlation function + if True, read time independent boundary to boundary + correlation function names: list - Alternative labeling for replicas/ensembles. Has to have the appropriate length + Alternative labeling for replicas/ensembles. + Has to have the appropriate length ens_name : str replaces the name of the ensemble version: str - version of SFCF, with which the measurement was done. if the compact output option (-c) was spectified, append a c to the version (e.g. "1.0c") + version of SFCF, with which the measurement was done. + if the compact output option (-c) was spectified, + append a "c" to the version (e.g. "1.0c") + if the append output option (-a) was specified, + append an "a" to the version replica: list list of replica to be read, default is all files: list - list of files to be read per replica, default is all. for non-conpact ouztput format, hand the folders to be read here. + list of files to be read per replica, default is all. + for non-conpact ouztput format, hand the folders to be read here. check_configs: - list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs + list of list of supposed configs, eg. [range(1,1000)] + for one replicum with 1000 configs + TODO: + - whats going on with files here? """ if kwargs.get('im'): im = 1 @@ -63,15 +79,17 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = if "files" in kwargs: files = kwargs.get("files") - #due to higher usage in current projects, compact file format is default + # due to higher usage in current projects, + # compact file format is default compact = True appended = False - #get version string - known_versions = ["0.0","1.0","2.0","1.0c","2.0c","1.0a","2.0a"] + # get version string + known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] - if not version in known_versions: + if version not in known_versions: raise Exception("This version is not known!") - #if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c) + # if the letter c is appended to the version, + # the compact fileformat is used (former read_sfcf_c) if(version[-1] == "c"): appended = False compact = True @@ -103,15 +121,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = if not fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set([exc])) if len(ls) > 1: - ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc. + # New version, to cope with ids, etc. + ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) + if not appended: replica = len(ls) else: - replica = len([l.split(".")[-1] for l in ls])//len(set([l.split(".")[-1] for l in ls])) - print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica') + replica = len([file.split(".")[-1] for file in ls])\ + // len(set([file.split(".")[-1] for file in ls])) + print('Read', part, 'part of', name, 'from', prefix[:-1], + ',', replica, 'replica') if 'names' in kwargs: new_names = kwargs.get('names') - if len(new_names)!=len(set(new_names)): + if len(new_names) != len(set(new_names)): raise Exception("names are not unique!") if len(new_names) != replica: raise Exception('Names does not have the required length', replica) @@ -123,32 +145,36 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = for entry in ls: try: idx = entry.index('r') - except: - raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") - + except Exception: + raise Exception("Automatic recognition of replicum failed, \ + please enter the key word 'names'.") + if 'ens_name' in kwargs: - new_names.append(kwargs.get('ens_name') + '|' + entry[idx:]) + new_names.append(kwargs.get('ens_name') + '|' + + entry[idx:]) else: new_names.append(entry[:idx] + '|' + entry[idx:]) else: - + for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.'+name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) for entry in ls: myentry = entry[:-len(name)-1] - print(myentry) + # print(myentry) try: idx = myentry.index('r') - except: - raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.") - + except Exception: + raise Exception("Automatic recognition of replicum failed, \ + please enter the key word 'names'.") + if 'ens_name' in kwargs: - new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) + new_names.append(kwargs.get('ens_name') + '|' + + myentry[idx:]) else: new_names.append(myentry[:idx] + '|' + myentry[idx:]) - #print(new_names) + # print(new_names) idl = [] if not appended: for i, item in enumerate(ls): @@ -157,24 +183,26 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = sub_ls = kwargs.get("files") sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) else: - for (dirpath, dirnames, filenames) in os.walk(path + '/' + item): + for (dirpath, dirnames, filenames) in \ + os.walk(path + '/' + item): if compact: sub_ls.extend(filenames) else: sub_ls.extend(dirnames) break - - #print(sub_ls) - for exc in sub_ls: + + # print(sub_ls) + for exc in sub_ls: if compact: if not fnmatch.fnmatch(exc, prefix + '*'): sub_ls = list(set(sub_ls) - set([exc])) - sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) + sub_ls.sort(key=lambda x: + int(re.findall(r'\d+', x)[-1])) else: if not fnmatch.fnmatch(exc, 'cfg*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(x[3:])) - #print(sub_ls) + # print(sub_ls) rep_idl = [] no_cfg = len(sub_ls) for cfg in sub_ls: @@ -183,54 +211,73 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = rep_idl.append(int(cfg.split("n")[-1])) else: rep_idl.append(int(cfg[3:])) - except: - raise Exception("Couldn't parse idl from directroy, problem with file "+cfg) + except Exception: + raise Exception("Couldn't parse idl from directroy, \ + problem with file "+cfg) rep_idl.sort() - #maybe there is a better way to print the idls + # maybe there is a better way to print the idls print(item, ':', no_cfg, ' configurations') idl.append(rep_idl) - #here we have found all the files we need to look into. + # here we have found all the files we need to look into. if i == 0: - #here, we want to find the place within the file, where the correlator we need is stored. + # here, we want to find the place within the file, + # where the correlator we need is stored. if compact: - #to do so, the pattern needed is put together from the input values - pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + # to do so, the pattern needed is put together + # from the input values + pattern = 'name ' + name + '\nquarks '\ + + quarks + '\noffset '\ + + str(noffset) + '\nwf '\ + + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) - #and the file is parsed through to find the pattern - with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: + # and the file is parsed through to find the pattern + with open(path + '/' + item + '/' + sub_ls[0], 'r') \ + as file: content = file.read() match = re.search(pattern, content) if match: - #the start and end point of the correlator in quaetion is extracted for later use in the other files - start_read = content.count('\n', 0, match.start()) + 5 + b2b - end_match = re.search(r'\n\s*\n', content[match.start():]) - T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b + # the start and end point of the correlator + # in quaetion is extracted for later use in + # the other files + start_read = content.count('\n', 0, match.start())\ + + 5 + b2b + end_match = re.search(r'\n\s*\n', + content[match.start():]) + T = content[match.start():]\ + .count('\n', 0, end_match.start()) - 4 - b2b assert T > 0 - print(T, 'entries, starting to read in line', start_read) + print(T, 'entries, starting to read in line', + start_read) else: - raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') + raise Exception('Correlator with pattern\n' + + pattern + '\nnot found.') else: - #this part does the same as above, but for non-compactified versions of the files - with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: + # this part does the same as above, + # but for non-compactified versions of the files + with open(path + '/' + item + '/' + sub_ls[0] + '/' + + name) as fp: for k, line in enumerate(fp): if version == "0.0": - #check if this is really the right file by matchin pattern similar to above - pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf) - #if b2b, a second wf is needed + # check if this is really the right file + # by matching pattern similar to above + pattern = "# "+name+" : offset "+str(noffset)\ + + ", wf "+str(wf) + # if b2b, a second wf is needed if b2b: - pattern+=", wf_2 "+str(wf2) + pattern += ", wf_2 "+str(wf2) qs = quarks.split(" ") - pattern+=" : "+qs[0]+" - "+qs[1] - #print(pattern) - if read == 1 and not line.strip() and k > start + 1: + pattern += " : " + qs[0]+" - " + qs[1] + # print(pattern) + if read == 1 and not line.strip() \ + and k > start + 1: break if read == 1 and k >= start: T += 1 if version == "0.0": if pattern in line: - #print(line) + # print(line) read = 1 start = k+1 else: @@ -239,121 +286,133 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = start = k + 7 + b2b T -= b2b print(str(T)+" entries found.") - #we found where the correlator that is to be read is in the files - #after preparing the datastructure the correlators get parsed into... + # we found where the correlator + # that is to be read is in the files + # after preparing the datastructure + # the correlators get parsed into... deltas = [] for j in range(T): deltas.append([]) - - + for t in range(T): deltas[t].append(np.zeros(no_cfg)) - #... the actual parsing can start. we iterate through all measurement files in the path given... + # ...the actual parsing can start. + # we iterate through all measurement files in the path given... if compact: for cfg in range(no_cfg): with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: lines = fp.readlines() - #check, if the correlator is in fact printed completely - if(start_read + T>len(lines)): - raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?") - #and start to read the correlator. - #the range here is chosen like this, since this allows for implementing a security check for every read correlator later... - for k in range(start_read - 6,start_read + T): + # check, if the correlator is in fact + # printed completely + if(start_read + T > len(lines)): + raise Exception("EOF before end of correlator data! \ + Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" \ + is corrupted?") + # and start to read the correlator. + # the range here is chosen like this, + # since this allows for implementing + # a security check for every read correlator later... + for k in range(start_read - 6, start_read + T): if k == start_read - 5 - b2b: if lines[k].strip() != 'name ' + name: - raise Exception('Wrong format', sub_ls[cfg]) + raise Exception('Wrong format', + sub_ls[cfg]) if(k >= start_read and k < start_read + T): floats = list(map(float, lines[k].split())) - deltas[k - start_read][i][cfg] = floats[-2:][im] + deltas[k - start_read][i][cfg] = \ + floats[-2:][im] else: for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem + '/' + name) as fp: - #since the non-compatified files are typically not so long, we can iterate over the whole file. - #here one can also implement the chekc from above. + with open(path + '/' + item + '/' + subitem + + '/' + name) as fp: + # since the non-compatified files + # are typically not so long, + # we can iterate over the whole file. + # here one can also implement the chekc from above. for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) if version == "0.0": deltas[k-start][i][cnfg] = floats[im] else: - deltas[k - start][i][cnfg] = floats[1 + im - single] - + deltas[k - start][i][cnfg] = \ + floats[1 + im - single] + else: for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.'+name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) - #print(ls) - pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) + # print(ls) + pattern = 'name ' + name + '\nquarks '\ + + quarks + '\noffset ' + str(noffset)\ + + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) - for rep,file in enumerate(ls): + for rep, file in enumerate(ls): rep_idl = [] with open(path + '/' + file, 'r') as fp: content = fp.readlines() data_starts = [] - for l,line in enumerate(content): + for linenumber, line in enumerate(content): if "[run]" in line: - data_starts.append(l) - if len(set([data_starts[i]-data_starts[i-1] for i in range(1,len(data_starts))])) > 1: - raise Exception ("Irregularities in file structure found, not all runs have the same output length") - #print(data_starts) - #first chunk of data + data_starts.append(linenumber) + if len(set([data_starts[i]-data_starts[i-1] for i in + range(1, len(data_starts))])) > 1: + raise Exception("Irregularities in file structure found,\ + not all runs have the same output length") + # first chunk of data chunk = content[:data_starts[1]] - for l,line in enumerate(chunk): + for linenumber, line in enumerate(chunk): if line.startswith("gauge_name"): - gauge_line = l - #meta_data["gauge_name"] = (line.strip()).split("/")[-1] + gauge_line = linenumber elif line.startswith("[correlator]"): - corr_line = l + corr_line = linenumber found_pat = "" for li in chunk[corr_line+1:corr_line+6+b2b]: found_pat += li - if re.search(pattern,found_pat): + if re.search(pattern, found_pat): start_read = corr_line+7+b2b - T=len(chunk)-1-start_read + T = len(chunk)-1-start_read if rep == 0: deltas = [] for t in range(T): deltas.append([]) for t in range(T): deltas[t].append(np.zeros(len(data_starts))) - #all other chunks should follow the same structure + # all other chunks should follow the same structure for cnfg in range(len(data_starts)): start = data_starts[cnfg] stop = start+data_starts[1] chunk = content[start:stop] - #meta_data = {} - + # meta_data = {} try: rep_idl.append(int(chunk[gauge_line].split("n")[-1])) - except: - raise Exception("Couldn't parse idl from directroy, problem with chunk around line "+gauge_line) - + except Exception: + raise Exception("Couldn't parse idl from directroy, \ + problem with chunk around line "+gauge_line) + found_pat = "" for li in chunk[corr_line+1:corr_line+6+b2b]: found_pat += li - if re.search(pattern,found_pat): - #print("found pattern") - for t,line in enumerate(chunk[start_read:start_read+T]): + if re.search(pattern, found_pat): + for t, line in \ + enumerate(chunk[start_read:start_read+T]): floats = list(map(float, line.split())) deltas[t][rep][cnfg] = floats[-2:][im] idl.append(rep_idl) - #print(new_names) - #print(deltas) - #print(idl) if "check_configs" in kwargs: print("Checking for missing configs...") che = kwargs.get("check_configs") if not (len(che) == len(idl)): - raise Exception("check_configs has to be the same length as replica!") + raise Exception("check_configs has to be the same length\ + as replica!") for r in range(len(idl)): print("checking "+new_names[r]) utils.check_idl(idl[r], che[r]) print("Done") result = [] for t in range(T): - result.append(Obs(deltas[t], new_names, idl = idl)) + result.append(Obs(deltas[t], new_names, idl=idl)) return result - From 5f156e4821d95b1c0923555468086eafea59b230 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Fri, 14 Jan 2022 16:47:34 +0100 Subject: [PATCH 15/17] flake8 compliance openQCD.py --- pyerrors/input/openQCD.py | 232 ++++++++++++++++++++++---------------- pyerrors/input/sfcf.py | 19 ++-- 2 files changed, 144 insertions(+), 107 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 5c44fd2f..60ed64c7 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -8,7 +8,6 @@ import struct import numpy as np # Thinly-wrapped numpy from ..obs import Obs from ..fits import fit_lin -from . import utils def read_rwms(path, prefix, version='2.0', names=None, **kwargs): @@ -67,7 +66,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): else: r_stop = [None] * replica - print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='') + print('Read reweighting factors from', prefix[:-1], ',', + replica, 'replica', end='') # Adjust replica names to new bookmarking system if names is None: @@ -75,7 +75,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for entry in ls: truncated_entry = entry.split('.')[0] idx = truncated_entry.index('r') - rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:]) + rep_names.append(truncated_entry[:idx] + '|' + + truncated_entry[idx:]) print_err = 0 if 'print_err' in kwargs: @@ -97,8 +98,13 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for k in range(nrw): deltas.append([]) else: - if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): # little weird if-clause due to the /2 operation needed. - raise Exception('Error: different number of reweighting factors for replicum', rep) + # little weird if-clause due to the /2 operation needed. + if ((nrw != struct.unpack('i', t)[0] and + (not version == '2.0')) or + (nrw != struct.unpack('i', t)[0] / 2 and + version == '2.0')): + raise Exception('Error: different number of reweighting\ + factors for replicum', rep) for k in range(nrw): tmp_array.append([]) @@ -109,7 +115,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for i in range(nrw): t = fp.read(4) nfct.append(struct.unpack('i', t)[0]) - # print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting + # print('nfct: ', nfct) # Hasenbusch factor, + # 1 for rat reweighting else: for i in range(nrw): nfct.append(1) @@ -138,8 +145,11 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for j in range(tmpd['n'][0]): tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw[j]))) if print_err: - print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw[j]))), np.std(np.exp(-np.asarray(tmp_rw[j])))) - print('Sources:', np.exp(-np.asarray(tmp_rw[j]))) + print(config_no, i, j, + np.mean(np.exp(-np.asarray(tmp_rw[j]))), + np.std(np.exp(-np.asarray(tmp_rw[j])))) + print('Sources:', + np.exp(-np.asarray(tmp_rw[j]))) print('Partial factor:', tmp_nfct) elif version == '1.6' or version == '1.4': tmp_nfct = 1.0 @@ -149,7 +159,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): tmp_rw = struct.unpack('d' * nsrc[i], t) tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw))) if print_err: - print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw)))) + print(config_no, i, j, + np.mean(np.exp(-np.asarray(tmp_rw))), + np.std(np.exp(-np.asarray(tmp_rw)))) print('Sources:', np.exp(-np.asarray(tmp_rw))) print('Partial factor:', tmp_nfct) tmp_array[i].append(tmp_nfct) @@ -168,11 +180,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): return result -def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs): +def extract_t0(path, prefix, dtr_read, xmin, + spatial_extent, fit_range=5, **kwargs): """Extract t0 from given .ms.dat files. Returns t0 as Obs. - It is assumed that all boundary effects have sufficiently decayed at x0=xmin. - The data around the zero crossing of t^2 - 0.3 is fitted with a linear function + It is assumed that all boundary effects have + sufficiently decayed at x0=xmin. + The data around the zero crossing of t^2 - 0.3 + is fitted with a linear function from which the exact root is extracted. Only works with openQCD v 1.2. @@ -183,14 +198,17 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar prefix : str Ensemble prefix dtr_read : int - Determines how many trajectories should be skipped when reading the ms.dat files. + Determines how many trajectories should be skipped + when reading the ms.dat files. Corresponds to dtr_cnfg / dtr_ms in the openQCD input file. xmin : int - First timeslice where the boundary effects have sufficiently decayed. + First timeslice where the boundary + effects have sufficiently decayed. spatial_extent : int spatial extent of the lattice, required for normalization. fit_range : int - Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5) + Number of data points left and right of the zero + crossing to be included in the linear fit. (Default: 5) r_start : list list which contains the first config to be read for each replicum. r_stop: list @@ -276,7 +294,9 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar Ysum.append([]) for i, item in enumerate(Ysl): - Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)]) + Ysum[-1].append([np.mean(item[current + xmin: + current + tmax - xmin]) + for current in range(0, len(item), tmax)]) t2E_dict = {} for n in range(nn + 1): @@ -287,12 +307,16 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar samples[-1].append(cnfg[n]) samples[-1] = samples[-1][r_start[nrep]:r_stop[nrep]] new_obs = Obs(samples, [(w.split('.'))[0] for w in ls]) - t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3 + t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs \ + / (spatial_extent ** 3) - 0.3 - zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0) + zero_crossing = np.argmax(np.array( + [o.value for o in t2E_dict.values()]) > 0.0) - x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range] - y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range] + x = list(t2E_dict.keys())[zero_crossing - fit_range: + zero_crossing + fit_range] + y = list(t2E_dict.values())[zero_crossing - fit_range: + zero_crossing + fit_range] [o.gamma_method() for o in y] fit_result = fit_lin(x, y) @@ -348,7 +372,7 @@ def _read_array_openQCD2(fp): return {'d': d, 'n': n, 'size': size, 'arr': arr} -def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): +def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): """Read qtop format from given folder structure. Parameters @@ -360,144 +384,150 @@ def read_qtop(path, prefix,c, dtr_cnfg = 1, version = "1.2",**kwargs): c: double Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L dtr_cnfg: int - (optional) parameter that specifies the number of trajectories between two configs. - if it is not set, the distance between two measurements in the file is assumed to be + (optional) parameter that specifies the number of trajectories + between two configs. + if it is not set, the distance between two measurements + in the file is assumed to be the distance between two configurations. steps: int (optional) (maybe only necessary for openQCD2.0) nt step size, guessed if not given version: str - version string of the openQCD (sfqcd) version used to create the ensemble + version string of the openQCD (sfqcd) version used to create + the ensemble L: int - spatial length of the lattice in L/a. HAS to be set if version != sfqcd, since openQCD does not provide this in the header + spatial length of the lattice in L/a. + HAS to be set if version != sfqcd, since openQCD does not provide + this in the header r_start: list - offset of the first ensemble, making it easier to match later on with other Obs + offset of the first ensemble, making it easier to match + later on with other Obs r_stop: list last configurations that need to be read (per replicum) - r_meas_start: list - offset of the first measured ensemble, if there is any files: list - specify the exact files that need to be read from path, pratical if e.g. only one replicum is needed + specify the exact files that need to be read + from path, pratical if e.g. only one replicum is needed names: list - Alternative labeling for replicas/ensembles. Has to have the appropriate length + Alternative labeling for replicas/ensembles. + Has to have the appropriate length """ - #one could read L from the header in case of sfQCD - #c = 0.35 - known_versions = ["1.0","1.2","1.4","1.6","2.0", "sfqcd"] - - if not version in known_versions: + # one could read L from the header in case of sfQCD + # c = 0.35 + known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"] + + if version not in known_versions: raise Exception("Unknown openQCD version.") - target = 0 if "steps" in kwargs: steps = kwargs.get("steps") - - if 'target' in kwargs: - target = kwargs.get('target') if version == "sfqcd": if "L" in kwargs: - supposed_L = kwargs.get("L") + supposed_L = kwargs.get("L") else: - if not "L" in kwargs: - raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.") + if "L" not in kwargs: + raise Exception("This version of openQCD needs you \ + to provide the spatial length of the \ + lattice as parameter 'L'.") else: L = kwargs.get("L") r_start = 1 - r_meas_start = 1 - if "r_meas_start" in kwargs: - r_meas_start = kwargs.get("r_meas_start") if "r_start" in kwargs: r_start = kwargs.get("r_start") if "r_stop" in kwargs: r_stop = kwargs.get("r_stop") - #if one wants to read specific files with this method... + # if one wants to read specific files with this method... if "files" in kwargs: files = kwargs.get("files") else: - #find files in path + # find files in path found = [] files = [] for (dirpath, dirnames, filenames) in os.walk(path+"/"): - #print(filenames) + # print(filenames) found.extend(filenames) break for f in found: if fnmatch.fnmatch(f, prefix+"*"+".ms.dat"): files.append(f) print(files) - #now that we found our files, we dechiffer them... + # now that we found our files, we dechiffer them... rep_names = [] - + deltas = [] idl = [] - for rep,file in enumerate(files): - + for rep, file in enumerate(files): with open(path+"/"+file, "rb") as fp: - #this, for now, is for version 1.2,1.4,1.6 and 2.0, but needs to be tested for the last 3, isncethe doc says its the same - #header + # header t = fp.read(12) header = struct.unpack(' 1: + range(1, len(data_starts))])) > 1: raise Exception("Irregularities in file structure found,\ - not all runs have the same output length") + not all runs have the same output length") # first chunk of data chunk = content[:data_starts[1]] for linenumber, line in enumerate(chunk): @@ -397,7 +398,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, found_pat += li if re.search(pattern, found_pat): for t, line in \ - enumerate(chunk[start_read:start_read+T]): + enumerate(chunk[start_read:start_read+T]): floats = list(map(float, line.split())) deltas[t][rep][cnfg] = floats[-2:][im] idl.append(rep_idl) From 62cb0ab1bbf9b587dc9335735a835c508562313f Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Fri, 14 Jan 2022 21:06:41 +0100 Subject: [PATCH 16/17] flake8 compliance without E501 --- pyerrors/input/openQCD.py | 56 +++++++------------ pyerrors/input/sfcf.py | 114 ++++++++++++++------------------------ 2 files changed, 62 insertions(+), 108 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 60ed64c7..8caede78 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -75,8 +75,7 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for entry in ls: truncated_entry = entry.split('.')[0] idx = truncated_entry.index('r') - rep_names.append(truncated_entry[:idx] + '|' - + truncated_entry[idx:]) + rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:]) print_err = 0 if 'print_err' in kwargs: @@ -99,12 +98,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): deltas.append([]) else: # little weird if-clause due to the /2 operation needed. - if ((nrw != struct.unpack('i', t)[0] and - (not version == '2.0')) or - (nrw != struct.unpack('i', t)[0] / 2 and - version == '2.0')): - raise Exception('Error: different number of reweighting\ - factors for replicum', rep) + if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): + raise Exception('Error: different number of reweighting factors for replicum', rep) for k in range(nrw): tmp_array.append([]) @@ -307,8 +302,7 @@ def extract_t0(path, prefix, dtr_read, xmin, samples[-1].append(cnfg[n]) samples[-1] = samples[-1][r_start[nrep]:r_stop[nrep]] new_obs = Obs(samples, [(w.split('.'))[0] for w in ls]) - t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs \ - / (spatial_extent ** 3) - 0.3 + t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3 zero_crossing = np.argmax(np.array( [o.value for o in t2E_dict.values()]) > 0.0) @@ -424,9 +418,7 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): supposed_L = kwargs.get("L") else: if "L" not in kwargs: - raise Exception("This version of openQCD needs you \ - to provide the spatial length of the \ - lattice as parameter 'L'.") + raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.") else: L = kwargs.get("L") r_start = 1 @@ -441,12 +433,12 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): # find files in path found = [] files = [] - for (dirpath, dirnames, filenames) in os.walk(path+"/"): + for (dirpath, dirnames, filenames) in os.walk(path + "/"): # print(filenames) found.extend(filenames) break for f in found: - if fnmatch.fnmatch(f, prefix+"*"+".ms.dat"): + if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"): files.append(f) print(files) # now that we found our files, we dechiffer them... @@ -455,7 +447,7 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): deltas = [] idl = [] for rep, file in enumerate(files): - with open(path+"/"+file, "rb") as fp: + with open(path + "/" + file, "rb") as fp: # header t = fp.read(12) header = struct.unpack(' 0 - print(T, 'entries, starting to read in line', - start_read) + print(T, 'entries, starting to read in line', start_read) else: - raise Exception('Correlator with pattern\n' - + pattern + '\nnot found.') + raise Exception('Correlator with pattern\n' + pattern + '\nnot found.') else: # this part does the same as above, # but for non-compactified versions of the files - with open(path + '/' + item + '/' + sub_ls[0] + '/' - + name) as fp: + with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp: for k, line in enumerate(fp): if version == "0.0": # check if this is really the right file # by matching pattern similar to above - pattern = "# "+name+" : offset "+str(noffset)\ - + ", wf "+str(wf) + pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf) # if b2b, a second wf is needed if b2b: - pattern += ", wf_2 "+str(wf2) + pattern += ", wf_2 " + str(wf2) qs = quarks.split(" ") - pattern += " : " + qs[0]+" - " + qs[1] + pattern += " : " + qs[0] + " - " + qs[1] # print(pattern) - if read == 1 and not line.strip() \ - and k > start + 1: + if read == 1 and not line.strip() and k > start + 1: break if read == 1 and k >= start: T += 1 @@ -277,13 +258,13 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if pattern in line: # print(line) read = 1 - start = k+1 + start = k + 1 else: if '[correlator]' in line: read = 1 start = k + 7 + b2b T -= b2b - print(str(T)+" entries found.") + print(str(T) + " entries found.") # we found where the correlator # that is to be read is in the files # after preparing the datastructure @@ -303,9 +284,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, # check, if the correlator is in fact # printed completely if(start_read + T > len(lines)): - raise Exception("EOF before end of correlator data! \ - Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" \ - is corrupted?") + raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?") # and start to read the correlator. # the range here is chosen like this, # since this allows for implementing @@ -317,12 +296,10 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, sub_ls[cfg]) if(k >= start_read and k < start_read + T): floats = list(map(float, lines[k].split())) - deltas[k - start_read][i][cfg] = \ - floats[-2:][im] + deltas[k - start_read][i][cfg] = floats[-2:][im] else: for cnfg, subitem in enumerate(sub_ls): - with open(path + '/' + item + '/' + subitem - + '/' + name) as fp: + with open(path + '/' + item + '/' + subitem + '/' + name) as fp: # since the non-compatified files # are typically not so long, # we can iterate over the whole file. @@ -331,23 +308,20 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if(k >= start and k < start + T): floats = list(map(float, line.split())) if version == "0.0": - deltas[k-start][i][cnfg] = floats[im] + deltas[k - start][i][cnfg] = floats[im] else: - deltas[k - start][i][cnfg] = \ - floats[1 + im - single] + deltas[k - start][i][cnfg] = floats[1 + im - single] else: if "files" in kwargs: ls = kwargs.get("files") else: for exc in ls: - if not fnmatch.fnmatch(exc, prefix + '*.'+name): + if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) # print(ls) - pattern = 'name ' + name + '\nquarks '\ - + quarks + '\noffset ' + str(noffset)\ - + '\nwf ' + str(wf) + pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) for rep, file in enumerate(ls): @@ -358,10 +332,9 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, for linenumber, line in enumerate(content): if "[run]" in line: data_starts.append(linenumber) - if len(set([data_starts[i]-data_starts[i-1] for i in - range(1, len(data_starts))])) > 1: - raise Exception("Irregularities in file structure found,\ - not all runs have the same output length") + if len(set([data_starts[i] - data_starts[i - 1] for i in + range(1, len(data_starts))])) > 1: + raise Exception("Irregularities in file structure found, not all runs have the same output length") # first chunk of data chunk = content[:data_starts[1]] for linenumber, line in enumerate(chunk): @@ -370,11 +343,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, elif line.startswith("[correlator]"): corr_line = linenumber found_pat = "" - for li in chunk[corr_line+1:corr_line+6+b2b]: + for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): - start_read = corr_line+7+b2b - T = len(chunk)-1-start_read + start_read = corr_line + 7 + b2b + T = len(chunk) - 1 - start_read if rep == 0: deltas = [] for t in range(T): @@ -384,21 +357,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, # all other chunks should follow the same structure for cnfg in range(len(data_starts)): start = data_starts[cnfg] - stop = start+data_starts[1] + stop = start + data_starts[1] chunk = content[start:stop] # meta_data = {} try: rep_idl.append(int(chunk[gauge_line].split("n")[-1])) except Exception: - raise Exception("Couldn't parse idl from directroy, \ - problem with chunk around line "+gauge_line) + raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line) found_pat = "" - for li in chunk[corr_line+1:corr_line+6+b2b]: + for li in chunk[corr_line + 1:corr_line + 6 + b2b]: found_pat += li if re.search(pattern, found_pat): - for t, line in \ - enumerate(chunk[start_read:start_read+T]): + for t, line in enumerate(chunk[start_read:start_read + T]): floats = list(map(float, line.split())) deltas[t][rep][cnfg] = floats[-2:][im] idl.append(rep_idl) @@ -407,10 +378,9 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, print("Checking for missing configs...") che = kwargs.get("check_configs") if not (len(che) == len(idl)): - raise Exception("check_configs has to be the same length\ - as replica!") + raise Exception("check_configs has to be the same length as replica!") for r in range(len(idl)): - print("checking "+new_names[r]) + print("checking " + new_names[r]) utils.check_idl(idl[r], che[r]) print("Done") result = [] From dc6b844fa4a89b1e59fe73d3bbe01e5c4330c055 Mon Sep 17 00:00:00 2001 From: jkuhl-uni Date: Fri, 14 Jan 2022 21:12:08 +0100 Subject: [PATCH 17/17] flake8 compliance without E501of utils.py --- pyerrors/input/utils.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pyerrors/input/utils.py b/pyerrors/input/utils.py index a8dd026e..66b60f68 100644 --- a/pyerrors/input/utils.py +++ b/pyerrors/input/utils.py @@ -1,14 +1,15 @@ """Utilities for the input""" -def check_idl(idl,che): + +def check_idl(idl, che): missing = [] for c in che: - if not c in idl: + if c not in idl: missing.append(c) - #print missing such that it can directly be parsed to slurm terminal + # print missing such that it can directly be parsed to slurm terminal if not (len(missing) == 0): - print(len(missing),"configs missing") + print(len(missing), "configs missing") miss_str = str(missing[0]) for i in missing[1:]: - miss_str += ","+str(i) + miss_str += "," + str(i) print(miss_str)