From b0bc9c8e47f80ec94ace13bc7622703283ad5b97 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 16 Jan 2022 16:46:59 +0100 Subject: [PATCH 1/3] refactor: input test notebook removed --- tests/input_test.ipynb | 136 ----------------------------------------- 1 file changed, 136 deletions(-) delete mode 100644 tests/input_test.ipynb diff --git a/tests/input_test.ipynb b/tests/input_test.ipynb deleted file mode 100644 index f241304a..00000000 --- a/tests/input_test.ipynb +++ /dev/null @@ -1,136 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This file is used for testing some of the input methods." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os,sys,inspect\n", - "current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n", - "parent_dir = os.path.dirname(current_dir)\n", - "sys.path.insert(0, parent_dir) \n", - "\n", - "import pyerrors as pe\n", - "import pyerrors.input.openQCD as qcdin\n", - "import pyerrors.input.sfcf as sfin\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we will have a look at the input method for the topological charge $Q_{top}$, which is measured by the program ms from the openQCD package. For now, this part still in the making and depends on an actual file. Later, this should be changed to a more efficient way of making a proper input file.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['T29L20k0.13719r2.ms.dat', 'T29L20k0.13719r3.ms.dat', 'T29L20k0.13719r1.ms.dat', 'T29L20k0.13719r4.ms.dat']\n", - "dn: 10\n", - "nn: 60\n", - "tmax: 30\n", - "eps: 0.02\n", - "max_t: 12.0\n", - "t_aim: 6.125\n", - "index_aim: 31\n", - "T29L20k0.13719r2\n", - "dn: 10\n", - "nn: 60\n", - "tmax: 30\n", - "eps: 0.02\n", - "max_t: 12.0\n", - "t_aim: 6.125\n", - "index_aim: 31\n", - "T29L20k0.13719r3\n", - "dn: 10\n", - "nn: 60\n", - "tmax: 30\n", - "eps: 0.02\n", - "max_t: 12.0\n", - "t_aim: 6.125\n", - "index_aim: 31\n", - "T29L20k0.13719r1\n", - "dn: 10\n", - "nn: 60\n", - "tmax: 30\n", - "eps: 0.02\n", - "max_t: 12.0\n", - "t_aim: 6.125\n", - "index_aim: 31\n", - "T29L20k0.13719r4\n" - ] - } - ], - "source": [ - "r_qtop = qcdin.read_qtop(\"../../test_data\", prefix = \"T29L20k0.13719\",full = True, r_stop = [500,440,447,410])#, files = [\"T29L20k0.13719r1.ms.dat\"], )" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'T29L20k0.13719|r1': 500, 'T29L20k0.13719|r2': 440, 'T29L20k0.13719|r3': 447, 'T29L20k0.13719|r4': 410}\n", - "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -2 -2 -2 -2 -3 -3 -3 -3 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " - ] - } - ], - "source": [ - "print(r_qtop.shape)\n", - "#print(r_qtop.deltas['T29L20k0.13719|r1'])\n", - "for i in r_qtop.deltas['T29L20k0.13719|r2']:\n", - " print(round(r_qtop.value + i), end =\" \")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "interpreter": { - "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" - }, - "kernelspec": { - "display_name": "Python 3.9.7 64-bit", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From f8cbaef626d9af7fe05458417259a78377e8a8c0 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 16 Jan 2022 16:50:51 +0100 Subject: [PATCH 2/3] refactor: unnecessary comments in input/sfcf removed, typos corrected --- pyerrors/input/sfcf.py | 48 +++++------------------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/pyerrors/input/sfcf.py b/pyerrors/input/sfcf.py index 2371d101..7e840dcd 100644 --- a/pyerrors/input/sfcf.py +++ b/pyerrors/input/sfcf.py @@ -43,15 +43,16 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, replaces the name of the ensemble version: str version of SFCF, with which the measurement was done. - if the compact output option (-c) was spectified, + if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, - append an "a" to the version + append an "a" to the version. Currently supported versions + are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a". replica: list list of replica to be read, default is all files: list list of files to be read per replica, default is all. - for non-conpact ouztput format, hand the folders to be read here. + for non-compact output format, hand the folders to be read here. check_configs: list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs @@ -77,17 +78,12 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if "replica" in kwargs: reps = kwargs.get("replica") - # due to higher usage in current projects, - # compact file format is default compact = True appended = False - # get version string known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"] if version not in known_versions: raise Exception("This version is not known!") - # if the letter c is appended to the version, - # the compact fileformat is used (former read_sfcf_c) if(version[-1] == "c"): appended = False compact = True @@ -119,7 +115,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if not fnmatch.fnmatch(exc, prefix + '*'): ls = list(set(ls) - set([exc])) if len(ls) > 1: - # New version, to cope with ids, etc. ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) if not appended: @@ -135,8 +130,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if len(new_names) != replica: raise Exception('Names does not have the required length', replica) else: - # Adjust replica names to new bookmarking system - new_names = [] if not appended: for entry in ls: @@ -157,7 +150,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) for entry in ls: myentry = entry[:-len(name) - 1] - # print(myentry) try: idx = myentry.index('r') except Exception: @@ -167,7 +159,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:]) else: new_names.append(myentry[:idx] + '|' + myentry[idx:]) - # print(new_names) idl = [] if not appended: for i, item in enumerate(ls): @@ -183,7 +174,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, sub_ls.extend(dirnames) break - # print(sub_ls) for exc in sub_ls: if compact: if not fnmatch.fnmatch(exc, prefix + '*'): @@ -194,7 +184,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if not fnmatch.fnmatch(exc, 'cfg*'): sub_ls = list(set(sub_ls) - set([exc])) sub_ls.sort(key=lambda x: int(x[3:])) - # print(sub_ls) rep_idl = [] no_cfg = len(sub_ls) for cfg in sub_ls: @@ -206,26 +195,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, except Exception: raise Exception("Couldn't parse idl from directroy, problem with file " + cfg) rep_idl.sort() - # maybe there is a better way to print the idls print(item, ':', no_cfg, ' configurations') idl.append(rep_idl) - # here we have found all the files we need to look into. if i == 0: - # here, we want to find the place within the file, - # where the correlator we need is stored. if compact: - # to do so, the pattern needed is put together - # from the input values pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) - # and the file is parsed through to find the pattern with open(path + '/' + item + '/' + sub_ls[0], 'r') as file: content = file.read() match = re.search(pattern, content) if match: # the start and end point of the correlator - # in quaetion is extracted for later use in + # in question is extracted for later use in # the other files start_read = content.count('\n', 0, match.start()) + 5 + b2b end_match = re.search(r'\n\s*\n', content[match.start():]) @@ -248,7 +230,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, pattern += ", wf_2 " + str(wf2) qs = quarks.split(" ") pattern += " : " + qs[0] + " - " + qs[1] - # print(pattern) if read == 1 and not line.strip() and k > start + 1: break if read == 1 and k >= start: @@ -265,30 +246,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, start = k + 7 + b2b T -= b2b print(str(T) + " entries found.") - # we found where the correlator - # that is to be read is in the files - # after preparing the datastructure - # the correlators get parsed into... deltas = [] for j in range(T): deltas.append([]) for t in range(T): deltas[t].append(np.zeros(no_cfg)) - # ...the actual parsing can start. # we iterate through all measurement files in the path given... if compact: for cfg in range(no_cfg): with open(path + '/' + item + '/' + sub_ls[cfg]) as fp: lines = fp.readlines() - # check, if the correlator is in fact - # printed completely if(start_read + T > len(lines)): raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?") - # and start to read the correlator. - # the range here is chosen like this, - # since this allows for implementing - # a security check for every read correlator later... for k in range(start_read - 6, start_read + T): if k == start_read - 5 - b2b: if lines[k].strip() != 'name ' + name: @@ -300,10 +270,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, else: for cnfg, subitem in enumerate(sub_ls): with open(path + '/' + item + '/' + subitem + '/' + name) as fp: - # since the non-compatified files - # are typically not so long, - # we can iterate over the whole file. - # here one can also implement the chekc from above. for k, line in enumerate(fp): if(k >= start and k < start + T): floats = list(map(float, line.split())) @@ -320,7 +286,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if not fnmatch.fnmatch(exc, prefix + '*.' + name): ls = list(set(ls) - set([exc])) ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1])) - # print(ls) pattern = 'name ' + name + '\nquarks ' + quarks + '\noffset ' + str(noffset) + '\nwf ' + str(wf) if b2b: pattern += '\nwf_2 ' + str(wf2) @@ -335,7 +300,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1: raise Exception("Irregularities in file structure found, not all runs have the same output length") - # first chunk of data chunk = content[:data_starts[1]] for linenumber, line in enumerate(chunk): if line.startswith("gauge_name"): @@ -354,12 +318,10 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, deltas.append([]) for t in range(T): deltas[t].append(np.zeros(len(data_starts))) - # all other chunks should follow the same structure for cnfg in range(len(data_starts)): start = data_starts[cnfg] stop = start + data_starts[1] chunk = content[start:stop] - # meta_data = {} try: rep_idl.append(int(chunk[gauge_line].split("n")[-1])) except Exception: From 5993f1a4baf8fee3b37590d2aa32cc6d5d879637 Mon Sep 17 00:00:00 2001 From: Fabian Joswig Date: Sun, 16 Jan 2022 16:53:57 +0100 Subject: [PATCH 3/3] refactor: comments removed in input/openQCD, typo corrected --- pyerrors/input/openQCD.py | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/pyerrors/input/openQCD.py b/pyerrors/input/openQCD.py index 8caede78..132be92e 100644 --- a/pyerrors/input/openQCD.py +++ b/pyerrors/input/openQCD.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# coding: utf-8 - import os import fnmatch import re @@ -42,7 +39,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): if 'files' in kwargs: ls = kwargs.get('files') else: - # Exclude files with different names for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'): ls = list(set(ls) - set([exc])) @@ -69,7 +65,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='') - # Adjust replica names to new bookmarking system if names is None: rep_names = [] for entry in ls: @@ -88,7 +83,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): tmp_array = [] with open(path + '/' + ls[rep], 'rb') as fp: - # header t = fp.read(4) # number of reweighting factors if rep == 0: nrw = struct.unpack('i', t)[0] @@ -97,7 +91,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for k in range(nrw): deltas.append([]) else: - # little weird if-clause due to the /2 operation needed. if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')): raise Exception('Error: different number of reweighting factors for replicum', rep) @@ -110,8 +103,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): for i in range(nrw): t = fp.read(4) nfct.append(struct.unpack('i', t)[0]) - # print('nfct: ', nfct) # Hasenbusch factor, - # 1 for rat reweighting else: for i in range(nrw): nfct.append(1) @@ -124,7 +115,6 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs): if not struct.unpack('i', fp.read(4))[0] == 0: print('something is wrong!') - # body while 0 < 1: t = fp.read(4) if len(t) < 4: @@ -220,7 +210,6 @@ def extract_t0(path, prefix, dtr_read, xmin, if not ls: raise Exception('Error, directory not found') - # Exclude files with different names for exc in ls: if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'): ls = list(set(ls) - set([exc])) @@ -232,7 +221,6 @@ def extract_t0(path, prefix, dtr_read, xmin, r_start = kwargs.get('r_start') if len(r_start) != replica: raise Exception('r_start does not match number of replicas') - # Adjust Configuration numbering to python index r_start = [o - 1 if o else None for o in r_start] else: r_start = [None] * replica @@ -251,7 +239,6 @@ def extract_t0(path, prefix, dtr_read, xmin, for rep in range(replica): with open(path + '/' + ls[rep], 'rb') as fp: - # Read header t = fp.read(12) header = struct.unpack('iii', t) if rep == 0: @@ -270,7 +257,6 @@ def extract_t0(path, prefix, dtr_read, xmin, Ysl = [] - # Read body while 0 < 1: t = fp.read(4) if(len(t) < 4): @@ -334,12 +320,6 @@ def _parse_array_openQCD2(d, n, size, wa, quadrupel=False): return arr -# mimic the read_array routine of openQCD-2.0. -# fp is the opened file handle -# returns the dict array -# at this point we only parse a 2d array -# d = 2 -# n = [nfct[irw], 2*nsrc[irw]] def _read_array_openQCD2(fp): t = fp.read(4) d = struct.unpack('i', t)[0] @@ -400,13 +380,11 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): last configurations that need to be read (per replicum) files: list specify the exact files that need to be read - from path, pratical if e.g. only one replicum is needed + from path, practical if e.g. only one replicum is needed names: list Alternative labeling for replicas/ensembles. Has to have the appropriate length """ - # one could read L from the header in case of sfQCD - # c = 0.35 known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"] if version not in known_versions: @@ -426,11 +404,9 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): r_start = kwargs.get("r_start") if "r_stop" in kwargs: r_stop = kwargs.get("r_stop") - # if one wants to read specific files with this method... if "files" in kwargs: files = kwargs.get("files") else: - # find files in path found = [] files = [] for (dirpath, dirnames, filenames) in os.walk(path + "/"): @@ -441,14 +417,12 @@ def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs): if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"): files.append(f) print(files) - # now that we found our files, we dechiffer them... rep_names = [] deltas = [] idl = [] for rep, file in enumerate(files): with open(path + "/" + file, "rb") as fp: - # header t = fp.read(12) header = struct.unpack('