Merge pull request #32 from jkuhl-uni/feature/input_2.0

Feature/input 2.0
2025-08-23 01:24:41 +02:00 · 2022-01-16 16:43:55 +01:00 · 2022-01-16 16:43:55 +01:00 · c76e8b8bad
commit c76e8b8bad
parent 1765d4e6b6 dc6b844fa4
4 changed files with 777 additions and 270 deletions
--- a/pyerrors/input/openQCD.py
+++ b/pyerrors/input/openQCD.py
@ -39,13 +39,15 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):

    if not ls:
        raise Exception('Error, directory not found')
-
-    # Exclude files with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
+    if 'files' in kwargs:
+        ls = kwargs.get('files')
+    else:
+        # Exclude files with different names
+        for exc in ls:
+            if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
+                ls = list(set(ls) - set([exc]))
+        if len(ls) > 1:
+            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
    replica = len(ls)

    if 'r_start' in kwargs:
@ -64,7 +66,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    else:
        r_stop = [None] * replica

-    print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='')
+    print('Read reweighting factors from', prefix[:-1], ',',
+          replica, 'replica', end='')

    # Adjust replica names to new bookmarking system
    if names is None:
@ -94,7 +97,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                for k in range(nrw):
                    deltas.append([])
            else:
-                if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):  # little weird if-clause due to the /2 operation needed.
+                # little weird if-clause due to the /2 operation needed.
+                if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):
                    raise Exception('Error: different number of reweighting factors for replicum', rep)

            for k in range(nrw):
@ -106,7 +110,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                for i in range(nrw):
                    t = fp.read(4)
                    nfct.append(struct.unpack('i', t)[0])
-                # print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
+                # print('nfct: ', nfct) # Hasenbusch factor,
+                # 1 for rat reweighting
            else:
                for i in range(nrw):
                    nfct.append(1)
@ -135,8 +140,11 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                        for j in range(tmpd['n'][0]):
                            tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw[j])))
                            if print_err:
-                                print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw[j]))), np.std(np.exp(-np.asarray(tmp_rw[j]))))
-                                print('Sources:', np.exp(-np.asarray(tmp_rw[j])))
+                                print(config_no, i, j,
+                                      np.mean(np.exp(-np.asarray(tmp_rw[j]))),
+                                      np.std(np.exp(-np.asarray(tmp_rw[j]))))
+                                print('Sources:',
+                                      np.exp(-np.asarray(tmp_rw[j])))
                                print('Partial factor:', tmp_nfct)
                    elif version == '1.6' or version == '1.4':
                        tmp_nfct = 1.0
@ -146,7 +154,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                            tmp_rw = struct.unpack('d' * nsrc[i], t)
                            tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw)))
                            if print_err:
-                                print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw))))
+                                print(config_no, i, j,
+                                      np.mean(np.exp(-np.asarray(tmp_rw))),
+                                      np.std(np.exp(-np.asarray(tmp_rw))))
                                print('Sources:', np.exp(-np.asarray(tmp_rw)))
                                print('Partial factor:', tmp_nfct)
                    tmp_array[i].append(tmp_nfct)
@ -165,11 +175,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    return result


-def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs):
+def extract_t0(path, prefix, dtr_read, xmin,
+               spatial_extent, fit_range=5, **kwargs):
    """Extract t0 from given .ms.dat files. Returns t0 as Obs.

-    It is assumed that all boundary effects have sufficiently decayed at x0=xmin.
-    The data around the zero crossing of t^2<E> - 0.3 is fitted with a linear function
+    It is assumed that all boundary effects have
+    sufficiently decayed at x0=xmin.
+    The data around the zero crossing of t^2<E> - 0.3
+    is fitted with a linear function
    from which the exact root is extracted.
    Only works with openQCD v 1.2.

@ -180,14 +193,17 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
    prefix : str
        Ensemble prefix
    dtr_read : int
-        Determines how many trajectories should be skipped when reading the ms.dat files.
+        Determines how many trajectories should be skipped
+        when reading the ms.dat files.
        Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
    xmin : int
-        First timeslice where the boundary effects have sufficiently decayed.
+        First timeslice where the boundary
+        effects have sufficiently decayed.
    spatial_extent : int
        spatial extent of the lattice, required for normalization.
    fit_range : int
-        Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5)
+        Number of data points left and right of the zero
+        crossing to be included in the linear fit. (Default: 5)
    r_start : list
        list which contains the first config to be read for each replicum.
    r_stop: list
@ -273,7 +289,9 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar

        Ysum.append([])
        for i, item in enumerate(Ysl):
-            Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)])
+            Ysum[-1].append([np.mean(item[current + xmin:
+                             current + tmax - xmin])
+                            for current in range(0, len(item), tmax)])

    t2E_dict = {}
    for n in range(nn + 1):
@ -286,10 +304,13 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
        new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
        t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3

-    zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0)
+    zero_crossing = np.argmax(np.array(
+        [o.value for o in t2E_dict.values()]) > 0.0)

-    x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range]
-    y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range]
+    x = list(t2E_dict.keys())[zero_crossing - fit_range:
+                              zero_crossing + fit_range]
+    y = list(t2E_dict.values())[zero_crossing - fit_range:
+                                zero_crossing + fit_range]
    [o.gamma_method() for o in y]

    fit_result = fit_lin(x, y)
@ -343,3 +364,243 @@ def _read_array_openQCD2(fp):

    arr = _parse_array_openQCD2(d, n, size, tmp, quadrupel=True)
    return {'d': d, 'n': n, 'size': size, 'arr': arr}
+
+
+def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
+    """Read qtop format from given folder structure.
+
+    Parameters
+    ----------
+    path:
+        path of the measurement files
+    prefix:
+        prefix of the measurement files, e.g. <prefix>_id0_r0.ms.dat
+    c: double
+        Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L
+    dtr_cnfg: int
+        (optional) parameter that specifies the number of trajectories
+        between two configs.
+        if it is not set, the distance between two measurements
+        in the file is assumed to be
+        the distance between two configurations.
+    steps: int
+        (optional) (maybe only necessary for openQCD2.0)
+        nt step size, guessed if not given
+    version: str
+        version string of the openQCD (sfqcd) version used to create
+        the ensemble
+    L: int
+        spatial length of the lattice in L/a.
+        HAS to be set if version != sfqcd, since openQCD does not provide
+        this in the header
+    r_start: list
+        offset of the first ensemble, making it easier to match
+        later on with other Obs
+    r_stop: list
+        last configurations that need to be read (per replicum)
+    files: list
+        specify the exact files that need to be read
+        from path, pratical if e.g. only one replicum is needed
+    names: list
+        Alternative labeling for replicas/ensembles.
+        Has to have the appropriate length
+    """
+    # one could read L from the header in case of sfQCD
+    # c = 0.35
+    known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"]
+
+    if version not in known_versions:
+        raise Exception("Unknown openQCD version.")
+    if "steps" in kwargs:
+        steps = kwargs.get("steps")
+    if version == "sfqcd":
+        if "L" in kwargs:
+            supposed_L = kwargs.get("L")
+    else:
+        if "L" not in kwargs:
+            raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.")
+        else:
+            L = kwargs.get("L")
+    r_start = 1
+    if "r_start" in kwargs:
+        r_start = kwargs.get("r_start")
+    if "r_stop" in kwargs:
+        r_stop = kwargs.get("r_stop")
+    # if one wants to read specific files with this method...
+    if "files" in kwargs:
+        files = kwargs.get("files")
+    else:
+        # find files in path
+        found = []
+        files = []
+        for (dirpath, dirnames, filenames) in os.walk(path + "/"):
+            # print(filenames)
+            found.extend(filenames)
+            break
+        for f in found:
+            if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"):
+                files.append(f)
+        print(files)
+    # now that we found our files, we dechiffer them...
+    rep_names = []
+
+    deltas = []
+    idl = []
+    for rep, file in enumerate(files):
+        with open(path + "/" + file, "rb") as fp:
+            # header
+            t = fp.read(12)
+            header = struct.unpack('<iii', t)
+            # step size in integration steps "dnms"
+            dn = header[0]
+            # number of measurements, so "ntot"/dn
+            nn = header[1]
+            # lattice T/a
+            tmax = header[2]
+            if version == "sfqcd":
+                t = fp.read(12)
+                Ls = struct.unpack('<iii', t)
+                if(Ls[0] == Ls[1] and Ls[1] == Ls[2]):
+                    L = Ls[0]
+                    if not (supposed_L == L):
+                        raise Exception("It seems the length given in the header and by you contradict each other")
+                else:
+                    raise Exception("Found more than one spatial length in header!")
+
+            print('dnms:', dn)
+            print('nn:', nn)
+            print('tmax:', tmax)
+            t = fp.read(8)
+            eps = struct.unpack('d', t)[0]
+            print('eps:', eps)
+
+            Q = []
+            ncs = []
+            while 0 < 1:
+                # int nt
+                t = fp.read(4)
+                if(len(t) < 4):
+                    break
+                ncs.append(struct.unpack('i', t)[0])
+                # Wsl
+                t = fp.read(8 * tmax * (nn + 1))
+                # Ysl
+                t = fp.read(8 * tmax * (nn + 1))
+                # Qsl, which is asked for in this method
+                t = fp.read(8 * tmax * (nn + 1))
+                # unpack the array of Qtops,
+                # on each timeslice t=0,...,tmax-1 and the
+                # measurement number in = 0...nn (see README.qcd1)
+                tmpd = struct.unpack('d' * tmax * (nn + 1), t)
+                Q.append(tmpd)
+
+        if not len(set([ncs[i] - ncs[i - 1] for i in range(1, len(ncs))])):
+            raise Exception("Irregularities in stepsize found")
+        else:
+            if 'steps' in kwargs:
+                if steps != ncs[1] - ncs[0]:
+                    raise Exception("steps and the found stepsize are not the same")
+            else:
+                steps = ncs[1] - ncs[0]
+
+        print(len(Q))
+        print('max_t:', dn * (nn) * eps)
+
+        t_aim = (c * L) ** 2 / 8
+
+        print('t_aim:', t_aim)
+        index_aim = round(t_aim / eps / dn)
+        print('index_aim:', index_aim)
+
+        Q_sum = []
+        for i, item in enumerate(Q):
+            Q_sum.append([sum(item[current:current + tmax])
+                         for current in range(0, len(item), tmax)])
+        print(len(Q_sum))
+        print(len(Q_sum[0]))
+        Q_round = []
+        for i in range(len(Q) // dtr_cnfg):
+            Q_round.append(round(Q_sum[dtr_cnfg * i][index_aim]))
+        if len(Q_round) != len(ncs) // dtr_cnfg:
+            raise Exception("qtops and ncs dont have the same length")
+
+        # replica = len(files)
+
+        truncated_file = file[:-7]
+        print(truncated_file)
+        idl_start = 1
+
+        if "r_start" in kwargs:
+            Q_round = Q_round[r_start[rep]:]
+            idl_start = r_start[rep]
+        if "r_stop" in kwargs:
+            Q_round = Q_round[:r_stop[rep]]
+        idl_stop = idl_start + len(Q_round)
+        # keyword "names" prevails over "ens_name"
+        if "names" not in kwargs:
+            try:
+                idx = truncated_file.index('r')
+            except Exception:
+                if "names" not in kwargs:
+                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
+            if "ens_name" in kwargs:
+                ens_name = kwargs.get("ens_name")
+            else:
+                ens_name = truncated_file[:idx]
+            rep_names.append(ens_name + '|' + truncated_file[idx:])
+        else:
+            names = kwargs.get("names")
+            rep_names = names
+        deltas.append(np.array(Q_round))
+        idl.append(range(idl_start, idl_stop))
+    # print(idl)
+    result = Obs(deltas, rep_names, idl=idl)
+    return result
+
+
+def read_qtop_sector(target=0, **kwargs):
+    """target: int
+            specifies the topological sector to be reweighted to (default 0)
+        q_top: Obs
+        alternatively takes args of read_qtop method as kwargs
+    """
+    if "q_top" in kwargs:
+        qtop = kwargs.get("q_top")
+    else:
+        if "path" in kwargs:
+            path = kwargs.get("path")
+            del kwargs["path"]
+        else:
+            raise Exception("If you are not providing q_top, please provide path")
+        if "prefix" in kwargs:
+            prefix = kwargs.get("prefix")
+            del kwargs["prefix"]
+        else:
+            raise Exception("If you are not providing q_top, please provide prefix")
+        if "c" in kwargs:
+            c = kwargs.get("c")
+            del kwargs["c"]
+        else:
+            raise Exception("If you are not providing q_top, please provide c")
+        if "version" in kwargs:
+            version = kwargs.get("version")
+            del kwargs["version"]
+        else:
+            version = "1.2"
+        if "dtr_cnfg" in kwargs:
+            dtr_cnfg = kwargs.get("dtr_cnfg")
+            del kwargs["dtr_cnfg"]
+        else:
+            dtr_cnfg = 1
+        qtop = read_qtop(path, prefix, c, dtr_cnfg=dtr_cnfg,
+                         version=version, **kwargs)
+    # unpack to original values, project onto target sector
+    names = qtop.names
+    print(names)
+    print(qtop.deltas.keys())
+    proj_qtop = []
+    for n in qtop.deltas:
+        proj_qtop.append(np.array([1 if int(qtop.value + q) == target else 0 for q in qtop.deltas[n]]))
+
+    result = Obs(proj_qtop, qtop.names)
+    return result
--- a/pyerrors/input/sfcf.py
+++ b/pyerrors/input/sfcf.py
@ -6,17 +6,57 @@ import fnmatch
 import re
 import numpy as np  # Thinly-wrapped numpy
 from ..obs import Obs
+from . import utils


-def read_sfcf(path, prefix, name, **kwargs):
-    """Read sfcf C format from given folder structure.
+def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
+              version="1.0c", **kwargs):
+    """Read sfcf c format from given folder structure.

    Parameters
    ----------
-    im -- if True, read imaginary instead of real part of the correlation function.
-    single -- if True, read a boundary-to-boundary correlation function with a single value
-    b2b -- if True, read a time-dependent boundary-to-boundary correlation function
-    names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
+    quarks: str
+        Label of the quarks used in the sfcf input file. e.g. "quark quark"
+        for version 0.0 this does NOT need to be given with the typical " - "
+        that is present in the output file,
+        this is done automatically for this version
+    noffset: int
+        Offset of the source (only relevant when wavefunctions are used)
+    wf: int
+        ID of wave function
+    wf2: int
+        ID of the second wavefunction
+        (only relevant for boundary-to-boundary correlation functions)
+    im: bool
+        if True, read imaginary instead of real part
+        of the correlation function.
+    b2b: bool
+        if True, read a time-dependent boundary-to-boundary
+        correlation function
+    single: bool
+        if True, read time independent boundary to boundary
+        correlation function
+    names: list
+        Alternative labeling for replicas/ensembles.
+        Has to have the appropriate length
+    ens_name : str
+        replaces the name of the ensemble
+    version: str
+        version of SFCF, with which the measurement was done.
+        if the compact output option (-c) was spectified,
+        append a "c" to the version (e.g. "1.0c")
+        if the append output option (-a) was specified,
+        append an "a" to the version
+    replica: list
+        list of replica to be read, default is all
+    files: list
+        list of files to be read per replica, default is all.
+        for non-conpact ouztput format, hand the folders to be read here.
+    check_configs:
+        list of list of supposed configs, eg. [range(1,1000)]
+        for one replicum with 1000 configs
+    TODO:
+    - whats going on with files here?
    """
    if kwargs.get('im'):
        im = 1
@ -29,266 +69,321 @@ def read_sfcf(path, prefix, name, **kwargs):
        b2b = 1
        single = 1
    else:
-        b2b = 0
+        if kwargs.get('b2b'):
+            b2b = 1
+        else:
+            b2b = 0
        single = 0
+    if "replica" in kwargs:
+        reps = kwargs.get("replica")

-    if kwargs.get('b2b'):
-        b2b = 1
+    # due to higher usage in current projects,
+    # compact file format is default
+    compact = True
+    appended = False
+    # get version string
+    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]

+    if version not in known_versions:
+        raise Exception("This version is not known!")
+    # if the letter c is appended to the version,
+    # the compact fileformat is used (former read_sfcf_c)
+    if(version[-1] == "c"):
+        appended = False
+        compact = True
+        version = version[:-1]
+    elif(version[-1] == "a"):
+        appended = True
+        compact = False
+        version = version[:-1]
+    else:
+        compact = False
+        appended = False
    read = 0
    T = 0
    start = 0
    ls = []
-    for (dirpath, dirnames, filenames) in os.walk(path):
-        ls.extend(dirnames)
-        break
-    if not ls:
-        raise Exception('Error, directory not found')
-    for exc in ls:
-        if fnmatch.fnmatch(exc, prefix + '*'):
-            ls = list(set(ls) - set(exc))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
-    replica = len(ls)
-    print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
-    if 'names' in kwargs:
-        new_names = kwargs.get('names')
-        if len(new_names) != replica:
-            raise Exception('Names does not have the required length', replica)
+    if "replica" in kwargs:
+        ls = reps
    else:
-        # Adjust replica names to new bookmarking system
-        new_names = []
-        for entry in ls:
-            idx = entry.index('r')
-            new_names.append(entry[:idx] + '|' + entry[idx:])
-
-    print(replica, 'replica')
-    for i, item in enumerate(ls):
-        print(item)
-        sub_ls = []
-        for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
-            sub_ls.extend(dirnames)
-            break
-        for exc in sub_ls:
-            if fnmatch.fnmatch(exc, 'cfg*'):
-                sub_ls = list(set(sub_ls) - set(exc))
-        sub_ls.sort(key=lambda x: int(x[3:]))
-        no_cfg = len(sub_ls)
-        print(no_cfg, 'configurations')
-
-        if i == 0:
-            with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
-                for k, line in enumerate(fp):
-                    if read == 1 and not line.strip() and k > start + 1:
-                        break
-                    if read == 1 and k >= start:
-                        T += 1
-                    if '[correlator]' in line:
-                        read = 1
-                        start = k + 7 + b2b
-                        T -= b2b
-
-            deltas = []
-            for j in range(T):
-                deltas.append([])
-
-        sublength = len(sub_ls)
-        for j in range(T):
-            deltas[j].append(np.zeros(sublength))
-
-        for cnfg, subitem in enumerate(sub_ls):
-            with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
-                for k, line in enumerate(fp):
-                    if(k >= start and k < start + T):
-                        floats = list(map(float, line.split()))
-                        deltas[k - start][i][cnfg] = floats[1 + im - single]
-
-    result = []
-    for t in range(T):
-        result.append(Obs(deltas[t], new_names))
-
-    return result
-
-
-def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
-    """Read sfcf c format from given folder structure.
-
-    Parameters
-    ----------
-    quarks -- Label of the quarks used in the sfcf input file
-    noffset -- Offset of the source (only relevant when wavefunctions are used)
-    wf -- ID of wave function
-    wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
-    im -- if True, read imaginary instead of real part of the correlation function.
-    b2b -- if True, read a time-dependent boundary-to-boundary correlation function
-    names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
-    ens_name : str
-        replaces the name of the ensemble
-    """
-
-    if kwargs.get('im'):
-        im = 1
-        part = 'imaginary'
-    else:
-        im = 0
-        part = 'real'
-
-    if kwargs.get('b2b'):
-        b2b = 1
-    else:
-        b2b = 0
-
-    T = 0
-    ls = []
-    for (dirpath, dirnames, filenames) in os.walk(path):
-        ls.extend(dirnames)
-        break
-    if not ls:
-        raise Exception('Error, directory not found')
-    # Exclude folders with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))  # New version, to cope with ids, etc.
-    replica = len(ls)
-    if 'names' in kwargs:
-        new_names = kwargs.get('names')
-        if len(new_names) != replica:
-            raise Exception('Names does not have the required length', replica)
-    else:
-        # Adjust replica names to new bookmarking system
-        new_names = []
-        for entry in ls:
-            idx = entry.index('r')
-            if 'ens_name' in kwargs:
-                new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
+        for (dirpath, dirnames, filenames) in os.walk(path):
+            if not appended:
+                ls.extend(dirnames)
            else:
-                new_names.append(entry[:idx] + '|' + entry[idx:])
-
-    print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
-    for i, item in enumerate(ls):
-        sub_ls = []
-        for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
-            sub_ls.extend(filenames)
+                ls.extend(filenames)
            break
-        for exc in sub_ls:
+        if not ls:
+            raise Exception('Error, directory not found')
+        # Exclude folders with different names
+        for exc in ls:
            if not fnmatch.fnmatch(exc, prefix + '*'):
-                sub_ls = list(set(sub_ls) - set([exc]))
-        sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+                ls = list(set(ls) - set([exc]))
+    if len(ls) > 1:
+        # New version, to cope with ids, etc.
+        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))

-        first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
+    if not appended:
+        replica = len(ls)
+    else:
+        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
+    print('Read', part, 'part of', name, 'from', prefix[:-1],
+          ',', replica, 'replica')
+    if 'names' in kwargs:
+        new_names = kwargs.get('names')
+        if len(new_names) != len(set(new_names)):
+            raise Exception("names are not unique!")
+        if len(new_names) != replica:
+            raise Exception('Names does not have the required length', replica)
+    else:
+        # Adjust replica names to new bookmarking system

-        last_cfg = len(sub_ls) + first_cfg - 1
+        new_names = []
+        if not appended:
+            for entry in ls:
+                try:
+                    idx = entry.index('r')
+                except Exception:
+                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")

-        for cfg in range(1, len(sub_ls)):
-            if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
-                last_cfg = cfg + first_cfg - 1
-                break
-
-        no_cfg = last_cfg - first_cfg + 1
-        print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
-
-        if i == 0:
-            pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
-            if b2b:
-                pattern += '\nwf_2      ' + str(wf2)
-
-            with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
-                content = file.read()
-                match = re.search(pattern, content)
-                if match:
-                    start_read = content.count('\n', 0, match.start()) + 5 + b2b
-                    end_match = re.search(r'\n\s*\n', content[match.start():])
-                    T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
-                    assert T > 0
-                    print(T, 'entries, starting to read in line', start_read)
+                if 'ens_name' in kwargs:
+                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
                else:
-                    raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
+                    new_names.append(entry[:idx] + '|' + entry[idx:])
+        else:

-            deltas = []
-            for j in range(T):
-                deltas.append([])
+            for exc in ls:
+                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
+                    ls = list(set(ls) - set([exc]))
+            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+            for entry in ls:
+                myentry = entry[:-len(name) - 1]
+                # print(myentry)
+                try:
+                    idx = myentry.index('r')
+                except Exception:
+                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")

-        sublength = no_cfg
-        for j in range(T):
-            deltas[j].append(np.zeros(sublength))
+                if 'ens_name' in kwargs:
+                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
+                else:
+                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
+            # print(new_names)
+    idl = []
+    if not appended:
+        for i, item in enumerate(ls):
+            sub_ls = []
+            if "files" in kwargs:
+                sub_ls = kwargs.get("files")
+                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+            else:
+                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
+                    if compact:
+                        sub_ls.extend(filenames)
+                    else:
+                        sub_ls.extend(dirnames)
+                    break

-        for cfg in range(no_cfg):
-            with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
-                for k, line in enumerate(fp):
-                    if k == start_read - 5 - b2b:
-                        if line.strip() != 'name      ' + name:
-                            raise Exception('Wrong format', sub_ls[cfg])
-                    if(k >= start_read and k < start_read + T):
-                        floats = list(map(float, line.split()))
-                        deltas[k - start_read][i][cfg] = floats[-2:][im]
+                # print(sub_ls)
+                for exc in sub_ls:
+                    if compact:
+                        if not fnmatch.fnmatch(exc, prefix + '*'):
+                            sub_ls = list(set(sub_ls) - set([exc]))
+                        sub_ls.sort(key=lambda x:
+                                    int(re.findall(r'\d+', x)[-1]))
+                    else:
+                        if not fnmatch.fnmatch(exc, 'cfg*'):
+                            sub_ls = list(set(sub_ls) - set([exc]))
+                        sub_ls.sort(key=lambda x: int(x[3:]))
+            # print(sub_ls)
+            rep_idl = []
+            no_cfg = len(sub_ls)
+            for cfg in sub_ls:
+                try:
+                    if compact:
+                        rep_idl.append(int(cfg.split("n")[-1]))
+                    else:
+                        rep_idl.append(int(cfg[3:]))
+                except Exception:
+                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
+            rep_idl.sort()
+            # maybe there is a better way to print the idls
+            print(item, ':', no_cfg, ' configurations')
+            idl.append(rep_idl)
+        # here we have found all the files we need to look into.
+            if i == 0:
+                # here, we want to find the place within the file,
+                # where the correlator we need is stored.
+                if compact:
+                    # to do so, the pattern needed is put together
+                    # from the input values
+                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
+                    if b2b:
+                        pattern += '\nwf_2      ' + str(wf2)
+                    # and the file is parsed through to find the pattern
+                    with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
+                        content = file.read()
+                        match = re.search(pattern, content)
+                        if match:
+                            # the start and end point of the correlator
+                            # in quaetion is extracted for later use in
+                            # the other files
+                            start_read = content.count('\n', 0, match.start()) + 5 + b2b
+                            end_match = re.search(r'\n\s*\n', content[match.start():])
+                            T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
+                            assert T > 0
+                            print(T, 'entries, starting to read in line', start_read)
+                        else:
+                            raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
+                else:
+                    # this part does the same as above,
+                    # but for non-compactified versions of the files
+                    with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
+                        for k, line in enumerate(fp):
+                            if version == "0.0":
+                                # check if this is really the right file
+                                # by matching pattern similar to above
+                                pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
+                                # if b2b, a second wf is needed
+                                if b2b:
+                                    pattern += ", wf_2 " + str(wf2)
+                                qs = quarks.split(" ")
+                                pattern += " : " + qs[0] + " - " + qs[1]
+                                # print(pattern)
+                            if read == 1 and not line.strip() and k > start + 1:
+                                break
+                            if read == 1 and k >= start:
+                                T += 1

+                            if version == "0.0":
+                                if pattern in line:
+                                    # print(line)
+                                    read = 1
+                                    start = k + 1
+                            else:
+                                if '[correlator]' in line:
+                                    read = 1
+                                    start = k + 7 + b2b
+                                    T -= b2b
+                        print(str(T) + " entries found.")
+                # we found where the correlator
+                # that is to be read is in the files
+                # after preparing the datastructure
+                # the correlators get parsed into...
+                deltas = []
+                for j in range(T):
+                    deltas.append([])
+
+            for t in range(T):
+                deltas[t].append(np.zeros(no_cfg))
+            # ...the actual parsing can start.
+            # we iterate through all measurement files in the path given...
+            if compact:
+                for cfg in range(no_cfg):
+                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
+                        lines = fp.readlines()
+                        # check, if the correlator is in fact
+                        # printed completely
+                        if(start_read + T > len(lines)):
+                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
+                        # and start to read the correlator.
+                        # the range here is chosen like this,
+                        # since this allows for implementing
+                        # a security check for every read correlator later...
+                        for k in range(start_read - 6, start_read + T):
+                            if k == start_read - 5 - b2b:
+                                if lines[k].strip() != 'name      ' + name:
+                                    raise Exception('Wrong format',
+                                                    sub_ls[cfg])
+                            if(k >= start_read and k < start_read + T):
+                                floats = list(map(float, lines[k].split()))
+                                deltas[k - start_read][i][cfg] = floats[-2:][im]
+            else:
+                for cnfg, subitem in enumerate(sub_ls):
+                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
+                        # since the non-compatified files
+                        # are typically not so long,
+                        # we can iterate over the whole file.
+                        # here one can also implement the chekc from above.
+                        for k, line in enumerate(fp):
+                            if(k >= start and k < start + T):
+                                floats = list(map(float, line.split()))
+                                if version == "0.0":
+                                    deltas[k - start][i][cnfg] = floats[im]
+                                else:
+                                    deltas[k - start][i][cnfg] = floats[1 + im - single]
+
+    else:
+        if "files" in kwargs:
+            ls = kwargs.get("files")
+        else:
+            for exc in ls:
+                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
+                    ls = list(set(ls) - set([exc]))
+                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+        # print(ls)
+        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
+        if b2b:
+            pattern += '\nwf_2      ' + str(wf2)
+        for rep, file in enumerate(ls):
+            rep_idl = []
+            with open(path + '/' + file, 'r') as fp:
+                content = fp.readlines()
+                data_starts = []
+                for linenumber, line in enumerate(content):
+                    if "[run]" in line:
+                        data_starts.append(linenumber)
+                if len(set([data_starts[i] - data_starts[i - 1] for i in
+                            range(1, len(data_starts))])) > 1:
+                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
+                # first chunk of data
+                chunk = content[:data_starts[1]]
+                for linenumber, line in enumerate(chunk):
+                    if line.startswith("gauge_name"):
+                        gauge_line = linenumber
+                    elif line.startswith("[correlator]"):
+                        corr_line = linenumber
+                        found_pat = ""
+                        for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
+                            found_pat += li
+                        if re.search(pattern, found_pat):
+                            start_read = corr_line + 7 + b2b
+                            T = len(chunk) - 1 - start_read
+                if rep == 0:
+                    deltas = []
+                    for t in range(T):
+                        deltas.append([])
+                for t in range(T):
+                    deltas[t].append(np.zeros(len(data_starts)))
+                # all other chunks should follow the same structure
+                for cnfg in range(len(data_starts)):
+                    start = data_starts[cnfg]
+                    stop = start + data_starts[1]
+                    chunk = content[start:stop]
+                    # meta_data = {}
+                    try:
+                        rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
+                    except Exception:
+                        raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line)
+
+                    found_pat = ""
+                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
+                        found_pat += li
+                    if re.search(pattern, found_pat):
+                        for t, line in enumerate(chunk[start_read:start_read + T]):
+                            floats = list(map(float, line.split()))
+                            deltas[t][rep][cnfg] = floats[-2:][im]
+            idl.append(rep_idl)
+
+    if "check_configs" in kwargs:
+        print("Checking for missing configs...")
+        che = kwargs.get("check_configs")
+        if not (len(che) == len(idl)):
+            raise Exception("check_configs has to be the same length as replica!")
+        for r in range(len(idl)):
+            print("checking " + new_names[r])
+            utils.check_idl(idl[r], che[r])
+        print("Done")
    result = []
    for t in range(T):
-        result.append(Obs(deltas[t], new_names))
-    return result
-
-
-def read_qtop(path, prefix, **kwargs):
-    """Read qtop format from given folder structure.
-
-    Parameters
-    ----------
-    target -- specifies the topological sector to be reweighted to (default 0)
-    full -- if true read the charge instead of the reweighting factor.
-    """
-
-    if 'target' in kwargs:
-        target = kwargs.get('target')
-    else:
-        target = 0
-
-    if kwargs.get('full'):
-        full = 1
-    else:
-        full = 0
-
-    ls = []
-    for (dirpath, dirnames, filenames) in os.walk(path):
-        ls.extend(filenames)
-        break
-
-    if not ls:
-        raise Exception('Error, directory not found')
-
-    # Exclude files with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))  # New version, to cope with ids, etc.
-    replica = len(ls)
-    print('Read Q_top from', prefix[:-1], ',', replica, 'replica')
-
-    deltas = []
-
-    for rep in range(replica):
-        tmp = []
-        with open(path + '/' + ls[rep]) as fp:
-            for k, line in enumerate(fp):
-                floats = list(map(float, line.split()))
-                if full == 1:
-                    tmp.append(floats[1])
-                else:
-                    if int(floats[1]) == target:
-                        tmp.append(1.0)
-                    else:
-                        tmp.append(0.0)
-
-        deltas.append(np.array(tmp))
-
-    rep_names = []
-    for entry in ls:
-        truncated_entry = entry.split('.')[0]
-        idx = truncated_entry.index('r')
-        rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:])
-
-    result = Obs(deltas, rep_names)
-
+        result.append(Obs(deltas[t], new_names, idl=idl))
    return result
--- a/pyerrors/input/utils.py
+++ b/pyerrors/input/utils.py
@ -0,0 +1,15 @@
+"""Utilities for the input"""
+
+
+def check_idl(idl, che):
+    missing = []
+    for c in che:
+        if c not in idl:
+            missing.append(c)
+    # print missing such that it can directly be parsed to slurm terminal
+    if not (len(missing) == 0):
+        print(len(missing), "configs missing")
+        miss_str = str(missing[0])
+        for i in missing[1:]:
+            miss_str += "," + str(i)
+        print(miss_str)
--- a/tests/input_test.ipynb
+++ b/tests/input_test.ipynb
@ -0,0 +1,136 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This file is used for testing some of the input methods."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os,sys,inspect\n",
+    "current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))\n",
+    "parent_dir = os.path.dirname(current_dir)\n",
+    "sys.path.insert(0, parent_dir) \n",
+    "\n",
+    "import pyerrors as pe\n",
+    "import pyerrors.input.openQCD as qcdin\n",
+    "import pyerrors.input.sfcf as sfin\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, we will have a look at the input method for the topological charge $Q_{top}$, which is measured by the program ms from the openQCD package. For now, this part still in the making and depends on an actual file. Later, this should be changed to a more efficient way of making a proper input file.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['T29L20k0.13719r2.ms.dat', 'T29L20k0.13719r3.ms.dat', 'T29L20k0.13719r1.ms.dat', 'T29L20k0.13719r4.ms.dat']\n",
+      "dn: 10\n",
+      "nn: 60\n",
+      "tmax: 30\n",
+      "eps: 0.02\n",
+      "max_t: 12.0\n",
+      "t_aim: 6.125\n",
+      "index_aim: 31\n",
+      "T29L20k0.13719r2\n",
+      "dn: 10\n",
+      "nn: 60\n",
+      "tmax: 30\n",
+      "eps: 0.02\n",
+      "max_t: 12.0\n",
+      "t_aim: 6.125\n",
+      "index_aim: 31\n",
+      "T29L20k0.13719r3\n",
+      "dn: 10\n",
+      "nn: 60\n",
+      "tmax: 30\n",
+      "eps: 0.02\n",
+      "max_t: 12.0\n",
+      "t_aim: 6.125\n",
+      "index_aim: 31\n",
+      "T29L20k0.13719r1\n",
+      "dn: 10\n",
+      "nn: 60\n",
+      "tmax: 30\n",
+      "eps: 0.02\n",
+      "max_t: 12.0\n",
+      "t_aim: 6.125\n",
+      "index_aim: 31\n",
+      "T29L20k0.13719r4\n"
+     ]
+    }
+   ],
+   "source": [
+    "r_qtop = qcdin.read_qtop(\"../../test_data\", prefix = \"T29L20k0.13719\",full = True, r_stop = [500,440,447,410])#, files = [\"T29L20k0.13719r1.ms.dat\"], )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'T29L20k0.13719|r1': 500, 'T29L20k0.13719|r2': 440, 'T29L20k0.13719|r3': 447, 'T29L20k0.13719|r4': 410}\n",
+      "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -2 -2 -2 -2 -3 -3 -3 -3 -2 -2 -2 -2 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 "
+     ]
+    }
+   ],
+   "source": [
+    "print(r_qtop.shape)\n",
+    "#print(r_qtop.deltas['T29L20k0.13719|r1'])\n",
+    "for i in r_qtop.deltas['T29L20k0.13719|r2']:\n",
+    "    print(round(r_qtop.value + i), end =\" \")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "interpreter": {
+   "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+  },
+  "kernelspec": {
+   "display_name": "Python 3.9.7 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}