Merge pull request #32 from jkuhl-uni/feature/input_2.0

Feature/input 2.0
2026-03-19 09:01:28 +01:00 · 2022-01-16 16:43:55 +01:00 · 2022-01-16 16:43:55 +01:00 · c76e8b8bad
commit c76e8b8bad
parent 1765d4e6b6 dc6b844fa4
4 changed files with 777 additions and 270 deletions
--- a/pyerrors/input/openQCD.py
+++ b/pyerrors/input/openQCD.py
@ -39,13 +39,15 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):

    if not ls:
        raise Exception('Error, directory not found')
-
-    # Exclude files with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
+    if 'files' in kwargs:
+        ls = kwargs.get('files')
+    else:
+        # Exclude files with different names
+        for exc in ls:
+            if not fnmatch.fnmatch(exc, prefix + '*' + postfix + '.dat'):
+                ls = list(set(ls) - set([exc]))
+        if len(ls) > 1:
+            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
    replica = len(ls)

    if 'r_start' in kwargs:
@ -64,7 +66,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    else:
        r_stop = [None] * replica

-    print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='')
+    print('Read reweighting factors from', prefix[:-1], ',',
+          replica, 'replica', end='')

    # Adjust replica names to new bookmarking system
    if names is None:
@ -94,7 +97,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                for k in range(nrw):
                    deltas.append([])
            else:
-                if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):  # little weird if-clause due to the /2 operation needed.
+                # little weird if-clause due to the /2 operation needed.
+                if ((nrw != struct.unpack('i', t)[0] and (not version == '2.0')) or (nrw != struct.unpack('i', t)[0] / 2 and version == '2.0')):
                    raise Exception('Error: different number of reweighting factors for replicum', rep)

            for k in range(nrw):
@ -106,7 +110,8 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                for i in range(nrw):
                    t = fp.read(4)
                    nfct.append(struct.unpack('i', t)[0])
-                # print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
+                # print('nfct: ', nfct) # Hasenbusch factor,
+                # 1 for rat reweighting
            else:
                for i in range(nrw):
                    nfct.append(1)
@ -135,8 +140,11 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                        for j in range(tmpd['n'][0]):
                            tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw[j])))
                            if print_err:
-                                print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw[j]))), np.std(np.exp(-np.asarray(tmp_rw[j]))))
-                                print('Sources:', np.exp(-np.asarray(tmp_rw[j])))
+                                print(config_no, i, j,
+                                      np.mean(np.exp(-np.asarray(tmp_rw[j]))),
+                                      np.std(np.exp(-np.asarray(tmp_rw[j]))))
+                                print('Sources:',
+                                      np.exp(-np.asarray(tmp_rw[j])))
                                print('Partial factor:', tmp_nfct)
                    elif version == '1.6' or version == '1.4':
                        tmp_nfct = 1.0
@ -146,7 +154,9 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
                            tmp_rw = struct.unpack('d' * nsrc[i], t)
                            tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw)))
                            if print_err:
-                                print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw))))
+                                print(config_no, i, j,
+                                      np.mean(np.exp(-np.asarray(tmp_rw))),
+                                      np.std(np.exp(-np.asarray(tmp_rw))))
                                print('Sources:', np.exp(-np.asarray(tmp_rw)))
                                print('Partial factor:', tmp_nfct)
                    tmp_array[i].append(tmp_nfct)
@ -165,11 +175,14 @@ def read_rwms(path, prefix, version='2.0', names=None, **kwargs):
    return result


-def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs):
+def extract_t0(path, prefix, dtr_read, xmin,
+               spatial_extent, fit_range=5, **kwargs):
    """Extract t0 from given .ms.dat files. Returns t0 as Obs.

-    It is assumed that all boundary effects have sufficiently decayed at x0=xmin.
-    The data around the zero crossing of t^2<E> - 0.3 is fitted with a linear function
+    It is assumed that all boundary effects have
+    sufficiently decayed at x0=xmin.
+    The data around the zero crossing of t^2<E> - 0.3
+    is fitted with a linear function
    from which the exact root is extracted.
    Only works with openQCD v 1.2.

@ -180,14 +193,17 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
    prefix : str
        Ensemble prefix
    dtr_read : int
-        Determines how many trajectories should be skipped when reading the ms.dat files.
+        Determines how many trajectories should be skipped
+        when reading the ms.dat files.
        Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
    xmin : int
-        First timeslice where the boundary effects have sufficiently decayed.
+        First timeslice where the boundary
+        effects have sufficiently decayed.
    spatial_extent : int
        spatial extent of the lattice, required for normalization.
    fit_range : int
-        Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5)
+        Number of data points left and right of the zero
+        crossing to be included in the linear fit. (Default: 5)
    r_start : list
        list which contains the first config to be read for each replicum.
    r_stop: list
@ -273,7 +289,9 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar

        Ysum.append([])
        for i, item in enumerate(Ysl):
-            Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)])
+            Ysum[-1].append([np.mean(item[current + xmin:
+                             current + tmax - xmin])
+                            for current in range(0, len(item), tmax)])

    t2E_dict = {}
    for n in range(nn + 1):
@ -286,10 +304,13 @@ def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwar
        new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
        t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3

-    zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0)
+    zero_crossing = np.argmax(np.array(
+        [o.value for o in t2E_dict.values()]) > 0.0)

-    x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range]
-    y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range]
+    x = list(t2E_dict.keys())[zero_crossing - fit_range:
+                              zero_crossing + fit_range]
+    y = list(t2E_dict.values())[zero_crossing - fit_range:
+                                zero_crossing + fit_range]
    [o.gamma_method() for o in y]

    fit_result = fit_lin(x, y)
@ -343,3 +364,243 @@ def _read_array_openQCD2(fp):

    arr = _parse_array_openQCD2(d, n, size, tmp, quadrupel=True)
    return {'d': d, 'n': n, 'size': size, 'arr': arr}
+
+
+def read_qtop(path, prefix, c, dtr_cnfg=1, version="1.2", **kwargs):
+    """Read qtop format from given folder structure.
+
+    Parameters
+    ----------
+    path:
+        path of the measurement files
+    prefix:
+        prefix of the measurement files, e.g. <prefix>_id0_r0.ms.dat
+    c: double
+        Smearing radius in units of the lattice extent, c = sqrt(8 t0) / L
+    dtr_cnfg: int
+        (optional) parameter that specifies the number of trajectories
+        between two configs.
+        if it is not set, the distance between two measurements
+        in the file is assumed to be
+        the distance between two configurations.
+    steps: int
+        (optional) (maybe only necessary for openQCD2.0)
+        nt step size, guessed if not given
+    version: str
+        version string of the openQCD (sfqcd) version used to create
+        the ensemble
+    L: int
+        spatial length of the lattice in L/a.
+        HAS to be set if version != sfqcd, since openQCD does not provide
+        this in the header
+    r_start: list
+        offset of the first ensemble, making it easier to match
+        later on with other Obs
+    r_stop: list
+        last configurations that need to be read (per replicum)
+    files: list
+        specify the exact files that need to be read
+        from path, pratical if e.g. only one replicum is needed
+    names: list
+        Alternative labeling for replicas/ensembles.
+        Has to have the appropriate length
+    """
+    # one could read L from the header in case of sfQCD
+    # c = 0.35
+    known_versions = ["1.0", "1.2", "1.4", "1.6", "2.0", "sfqcd"]
+
+    if version not in known_versions:
+        raise Exception("Unknown openQCD version.")
+    if "steps" in kwargs:
+        steps = kwargs.get("steps")
+    if version == "sfqcd":
+        if "L" in kwargs:
+            supposed_L = kwargs.get("L")
+    else:
+        if "L" not in kwargs:
+            raise Exception("This version of openQCD needs you to provide the spatial length of the lattice as parameter 'L'.")
+        else:
+            L = kwargs.get("L")
+    r_start = 1
+    if "r_start" in kwargs:
+        r_start = kwargs.get("r_start")
+    if "r_stop" in kwargs:
+        r_stop = kwargs.get("r_stop")
+    # if one wants to read specific files with this method...
+    if "files" in kwargs:
+        files = kwargs.get("files")
+    else:
+        # find files in path
+        found = []
+        files = []
+        for (dirpath, dirnames, filenames) in os.walk(path + "/"):
+            # print(filenames)
+            found.extend(filenames)
+            break
+        for f in found:
+            if fnmatch.fnmatch(f, prefix + "*" + ".ms.dat"):
+                files.append(f)
+        print(files)
+    # now that we found our files, we dechiffer them...
+    rep_names = []
+
+    deltas = []
+    idl = []
+    for rep, file in enumerate(files):
+        with open(path + "/" + file, "rb") as fp:
+            # header
+            t = fp.read(12)
+            header = struct.unpack('<iii', t)
+            # step size in integration steps "dnms"
+            dn = header[0]
+            # number of measurements, so "ntot"/dn
+            nn = header[1]
+            # lattice T/a
+            tmax = header[2]
+            if version == "sfqcd":
+                t = fp.read(12)
+                Ls = struct.unpack('<iii', t)
+                if(Ls[0] == Ls[1] and Ls[1] == Ls[2]):
+                    L = Ls[0]
+                    if not (supposed_L == L):
+                        raise Exception("It seems the length given in the header and by you contradict each other")
+                else:
+                    raise Exception("Found more than one spatial length in header!")
+
+            print('dnms:', dn)
+            print('nn:', nn)
+            print('tmax:', tmax)
+            t = fp.read(8)
+            eps = struct.unpack('d', t)[0]
+            print('eps:', eps)
+
+            Q = []
+            ncs = []
+            while 0 < 1:
+                # int nt
+                t = fp.read(4)
+                if(len(t) < 4):
+                    break
+                ncs.append(struct.unpack('i', t)[0])
+                # Wsl
+                t = fp.read(8 * tmax * (nn + 1))
+                # Ysl
+                t = fp.read(8 * tmax * (nn + 1))
+                # Qsl, which is asked for in this method
+                t = fp.read(8 * tmax * (nn + 1))
+                # unpack the array of Qtops,
+                # on each timeslice t=0,...,tmax-1 and the
+                # measurement number in = 0...nn (see README.qcd1)
+                tmpd = struct.unpack('d' * tmax * (nn + 1), t)
+                Q.append(tmpd)
+
+        if not len(set([ncs[i] - ncs[i - 1] for i in range(1, len(ncs))])):
+            raise Exception("Irregularities in stepsize found")
+        else:
+            if 'steps' in kwargs:
+                if steps != ncs[1] - ncs[0]:
+                    raise Exception("steps and the found stepsize are not the same")
+            else:
+                steps = ncs[1] - ncs[0]
+
+        print(len(Q))
+        print('max_t:', dn * (nn) * eps)
+
+        t_aim = (c * L) ** 2 / 8
+
+        print('t_aim:', t_aim)
+        index_aim = round(t_aim / eps / dn)
+        print('index_aim:', index_aim)
+
+        Q_sum = []
+        for i, item in enumerate(Q):
+            Q_sum.append([sum(item[current:current + tmax])
+                         for current in range(0, len(item), tmax)])
+        print(len(Q_sum))
+        print(len(Q_sum[0]))
+        Q_round = []
+        for i in range(len(Q) // dtr_cnfg):
+            Q_round.append(round(Q_sum[dtr_cnfg * i][index_aim]))
+        if len(Q_round) != len(ncs) // dtr_cnfg:
+            raise Exception("qtops and ncs dont have the same length")
+
+        # replica = len(files)
+
+        truncated_file = file[:-7]
+        print(truncated_file)
+        idl_start = 1
+
+        if "r_start" in kwargs:
+            Q_round = Q_round[r_start[rep]:]
+            idl_start = r_start[rep]
+        if "r_stop" in kwargs:
+            Q_round = Q_round[:r_stop[rep]]
+        idl_stop = idl_start + len(Q_round)
+        # keyword "names" prevails over "ens_name"
+        if "names" not in kwargs:
+            try:
+                idx = truncated_file.index('r')
+            except Exception:
+                if "names" not in kwargs:
+                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
+            if "ens_name" in kwargs:
+                ens_name = kwargs.get("ens_name")
+            else:
+                ens_name = truncated_file[:idx]
+            rep_names.append(ens_name + '|' + truncated_file[idx:])
+        else:
+            names = kwargs.get("names")
+            rep_names = names
+        deltas.append(np.array(Q_round))
+        idl.append(range(idl_start, idl_stop))
+    # print(idl)
+    result = Obs(deltas, rep_names, idl=idl)
+    return result
+
+
+def read_qtop_sector(target=0, **kwargs):
+    """target: int
+            specifies the topological sector to be reweighted to (default 0)
+        q_top: Obs
+        alternatively takes args of read_qtop method as kwargs
+    """
+    if "q_top" in kwargs:
+        qtop = kwargs.get("q_top")
+    else:
+        if "path" in kwargs:
+            path = kwargs.get("path")
+            del kwargs["path"]
+        else:
+            raise Exception("If you are not providing q_top, please provide path")
+        if "prefix" in kwargs:
+            prefix = kwargs.get("prefix")
+            del kwargs["prefix"]
+        else:
+            raise Exception("If you are not providing q_top, please provide prefix")
+        if "c" in kwargs:
+            c = kwargs.get("c")
+            del kwargs["c"]
+        else:
+            raise Exception("If you are not providing q_top, please provide c")
+        if "version" in kwargs:
+            version = kwargs.get("version")
+            del kwargs["version"]
+        else:
+            version = "1.2"
+        if "dtr_cnfg" in kwargs:
+            dtr_cnfg = kwargs.get("dtr_cnfg")
+            del kwargs["dtr_cnfg"]
+        else:
+            dtr_cnfg = 1
+        qtop = read_qtop(path, prefix, c, dtr_cnfg=dtr_cnfg,
+                         version=version, **kwargs)
+    # unpack to original values, project onto target sector
+    names = qtop.names
+    print(names)
+    print(qtop.deltas.keys())
+    proj_qtop = []
+    for n in qtop.deltas:
+        proj_qtop.append(np.array([1 if int(qtop.value + q) == target else 0 for q in qtop.deltas[n]]))
+
+    result = Obs(proj_qtop, qtop.names)
+    return result
--- a/pyerrors/input/sfcf.py
+++ b/pyerrors/input/sfcf.py
@ -6,17 +6,57 @@ import fnmatch
 import re
 import numpy as np  # Thinly-wrapped numpy
 from ..obs import Obs
+from . import utils


-def read_sfcf(path, prefix, name, **kwargs):
-    """Read sfcf C format from given folder structure.
+def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
+              version="1.0c", **kwargs):
+    """Read sfcf c format from given folder structure.

    Parameters
    ----------
-    im -- if True, read imaginary instead of real part of the correlation function.
-    single -- if True, read a boundary-to-boundary correlation function with a single value
-    b2b -- if True, read a time-dependent boundary-to-boundary correlation function
-    names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
+    quarks: str
+        Label of the quarks used in the sfcf input file. e.g. "quark quark"
+        for version 0.0 this does NOT need to be given with the typical " - "
+        that is present in the output file,
+        this is done automatically for this version
+    noffset: int
+        Offset of the source (only relevant when wavefunctions are used)
+    wf: int
+        ID of wave function
+    wf2: int
+        ID of the second wavefunction
+        (only relevant for boundary-to-boundary correlation functions)
+    im: bool
+        if True, read imaginary instead of real part
+        of the correlation function.
+    b2b: bool
+        if True, read a time-dependent boundary-to-boundary
+        correlation function
+    single: bool
+        if True, read time independent boundary to boundary
+        correlation function
+    names: list
+        Alternative labeling for replicas/ensembles.
+        Has to have the appropriate length
+    ens_name : str
+        replaces the name of the ensemble
+    version: str
+        version of SFCF, with which the measurement was done.
+        if the compact output option (-c) was spectified,
+        append a "c" to the version (e.g. "1.0c")
+        if the append output option (-a) was specified,
+        append an "a" to the version
+    replica: list
+        list of replica to be read, default is all
+    files: list
+        list of files to be read per replica, default is all.
+        for non-conpact ouztput format, hand the folders to be read here.
+    check_configs:
+        list of list of supposed configs, eg. [range(1,1000)]
+        for one replicum with 1000 configs
+    TODO:
+    - whats going on with files here?
    """
    if kwargs.get('im'):
        im = 1
@ -29,266 +69,321 @@ def read_sfcf(path, prefix, name, **kwargs):
        b2b = 1
        single = 1
    else:
-        b2b = 0
+        if kwargs.get('b2b'):
+            b2b = 1
+        else:
+            b2b = 0
        single = 0
+    if "replica" in kwargs:
+        reps = kwargs.get("replica")

-    if kwargs.get('b2b'):
-        b2b = 1
+    # due to higher usage in current projects,
+    # compact file format is default
+    compact = True
+    appended = False
+    # get version string
+    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]

+    if version not in known_versions:
+        raise Exception("This version is not known!")
+    # if the letter c is appended to the version,
+    # the compact fileformat is used (former read_sfcf_c)
+    if(version[-1] == "c"):
+        appended = False
+        compact = True
+        version = version[:-1]
+    elif(version[-1] == "a"):
+        appended = True
+        compact = False
+        version = version[:-1]
+    else:
+        compact = False
+        appended = False
    read = 0
    T = 0
    start = 0
    ls = []
-    for (dirpath, dirnames, filenames) in os.walk(path):
-        ls.extend(dirnames)
-        break
-    if not ls:
-        raise Exception('Error, directory not found')
-    for exc in ls:
-        if fnmatch.fnmatch(exc, prefix + '*'):
-            ls = list(set(ls) - set(exc))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
-    replica = len(ls)
-    print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
-    if 'names' in kwargs:
-        new_names = kwargs.get('names')
-        if len(new_names) != replica:
-            raise Exception('Names does not have the required length', replica)
+    if "replica" in kwargs:
+        ls = reps
    else:
-        # Adjust replica names to new bookmarking system
-        new_names = []
-        for entry in ls:
-            idx = entry.index('r')
-            new_names.append(entry[:idx] + '|' + entry[idx:])
-
-    print(replica, 'replica')
-    for i, item in enumerate(ls):
-        print(item)
-        sub_ls = []
-        for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
-            sub_ls.extend(dirnames)
-            break
-        for exc in sub_ls:
-            if fnmatch.fnmatch(exc, 'cfg*'):
-                sub_ls = list(set(sub_ls) - set(exc))
-        sub_ls.sort(key=lambda x: int(x[3:]))
-        no_cfg = len(sub_ls)
-        print(no_cfg, 'configurations')
-
-        if i == 0:
-            with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
-                for k, line in enumerate(fp):
-                    if read == 1 and not line.strip() and k > start + 1:
-                        break
-                    if read == 1 and k >= start:
-                        T += 1
-                    if '[correlator]' in line:
-                        read = 1
-                        start = k + 7 + b2b
-                        T -= b2b
-
-            deltas = []
-            for j in range(T):
-                deltas.append([])
-
-        sublength = len(sub_ls)
-        for j in range(T):
-            deltas[j].append(np.zeros(sublength))
-
-        for cnfg, subitem in enumerate(sub_ls):
-            with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
-                for k, line in enumerate(fp):
-                    if(k >= start and k < start + T):
-                        floats = list(map(float, line.split()))
-                        deltas[k - start][i][cnfg] = floats[1 + im - single]
-
-    result = []
-    for t in range(T):
-        result.append(Obs(deltas[t], new_names))
-
-    return result
-
-
-def read_sfcf_c(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
-    """Read sfcf c format from given folder structure.
-
-    Parameters
-    ----------
-    quarks -- Label of the quarks used in the sfcf input file
-    noffset -- Offset of the source (only relevant when wavefunctions are used)
-    wf -- ID of wave function
-    wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
-    im -- if True, read imaginary instead of real part of the correlation function.
-    b2b -- if True, read a time-dependent boundary-to-boundary correlation function
-    names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
-    ens_name : str
-        replaces the name of the ensemble
-    """
-
-    if kwargs.get('im'):
-        im = 1
-        part = 'imaginary'
-    else:
-        im = 0
-        part = 'real'
-
-    if kwargs.get('b2b'):
-        b2b = 1
-    else:
-        b2b = 0
-
-    T = 0
-    ls = []
-    for (dirpath, dirnames, filenames) in os.walk(path):
-        ls.extend(dirnames)
-        break
-    if not ls:
-        raise Exception('Error, directory not found')
-    # Exclude folders with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))  # New version, to cope with ids, etc.
-    replica = len(ls)
-    if 'names' in kwargs:
-        new_names = kwargs.get('names')
-        if len(new_names) != replica:
-            raise Exception('Names does not have the required length', replica)
-    else:
-        # Adjust replica names to new bookmarking system
-        new_names = []
-        for entry in ls:
-            idx = entry.index('r')
-            if 'ens_name' in kwargs:
-                new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
+        for (dirpath, dirnames, filenames) in os.walk(path):
+            if not appended:
+                ls.extend(dirnames)
            else:
-                new_names.append(entry[:idx] + '|' + entry[idx:])
-
-    print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
-    for i, item in enumerate(ls):
-        sub_ls = []
-        for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
-            sub_ls.extend(filenames)
+                ls.extend(filenames)
            break
-        for exc in sub_ls:
+        if not ls:
+            raise Exception('Error, directory not found')
+        # Exclude folders with different names
+        for exc in ls:
            if not fnmatch.fnmatch(exc, prefix + '*'):
-                sub_ls = list(set(sub_ls) - set([exc]))
-        sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+                ls = list(set(ls) - set([exc]))
+    if len(ls) > 1:
+        # New version, to cope with ids, etc.
+        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))

-        first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
+    if not appended:
+        replica = len(ls)
+    else:
+        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
+    print('Read', part, 'part of', name, 'from', prefix[:-1],
+          ',', replica, 'replica')
+    if 'names' in kwargs:
+        new_names = kwargs.get('names')
+        if len(new_names) != len(set(new_names)):
+            raise Exception("names are not unique!")
+        if len(new_names) != replica:
+            raise Exception('Names does not have the required length', replica)
+    else:
+        # Adjust replica names to new bookmarking system

-        last_cfg = len(sub_ls) + first_cfg - 1
+        new_names = []
+        if not appended:
+            for entry in ls:
+                try:
+                    idx = entry.index('r')
+                except Exception:
+                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")

-        for cfg in range(1, len(sub_ls)):
-            if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
-                last_cfg = cfg + first_cfg - 1
-                break
-
-        no_cfg = last_cfg - first_cfg + 1
-        print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
-
-        if i == 0:
-            pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
-            if b2b:
-                pattern += '\nwf_2      ' + str(wf2)
-
-            with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
-                content = file.read()
-                match = re.search(pattern, content)
-                if match:
-                    start_read = content.count('\n', 0, match.start()) + 5 + b2b
-                    end_match = re.search(r'\n\s*\n', content[match.start():])
-                    T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
-                    assert T > 0
-                    print(T, 'entries, starting to read in line', start_read)
+                if 'ens_name' in kwargs:
+                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
                else:
-                    raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
+                    new_names.append(entry[:idx] + '|' + entry[idx:])
+        else:

-            deltas = []
-            for j in range(T):
-                deltas.append([])
+            for exc in ls:
+                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
+                    ls = list(set(ls) - set([exc]))
+            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+            for entry in ls:
+                myentry = entry[:-len(name) - 1]
+                # print(myentry)
+                try:
+                    idx = myentry.index('r')
+                except Exception:
+                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")

-        sublength = no_cfg
-        for j in range(T):
-            deltas[j].append(np.zeros(sublength))
+                if 'ens_name' in kwargs:
+                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
+                else:
+                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
+            # print(new_names)
+    idl = []
+    if not appended:
+        for i, item in enumerate(ls):
+            sub_ls = []
+            if "files" in kwargs:
+                sub_ls = kwargs.get("files")
+                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+            else:
+                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
+                    if compact:
+                        sub_ls.extend(filenames)
+                    else:
+                        sub_ls.extend(dirnames)
+                    break

-        for cfg in range(no_cfg):
-            with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
-                for k, line in enumerate(fp):
-                    if k == start_read - 5 - b2b:
-                        if line.strip() != 'name      ' + name:
-                            raise Exception('Wrong format', sub_ls[cfg])
-                    if(k >= start_read and k < start_read + T):
-                        floats = list(map(float, line.split()))
-                        deltas[k - start_read][i][cfg] = floats[-2:][im]
+                # print(sub_ls)
+                for exc in sub_ls:
+                    if compact:
+                        if not fnmatch.fnmatch(exc, prefix + '*'):
+                            sub_ls = list(set(sub_ls) - set([exc]))
+                        sub_ls.sort(key=lambda x:
+                                    int(re.findall(r'\d+', x)[-1]))
+                    else:
+                        if not fnmatch.fnmatch(exc, 'cfg*'):
+                            sub_ls = list(set(sub_ls) - set([exc]))
+                        sub_ls.sort(key=lambda x: int(x[3:]))
+            # print(sub_ls)
+            rep_idl = []
+            no_cfg = len(sub_ls)
+            for cfg in sub_ls:
+                try:
+                    if compact:
+                        rep_idl.append(int(cfg.split("n")[-1]))
+                    else:
+                        rep_idl.append(int(cfg[3:]))
+                except Exception:
+                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
+            rep_idl.sort()
+            # maybe there is a better way to print the idls
+            print(item, ':', no_cfg, ' configurations')
+            idl.append(rep_idl)
+        # here we have found all the files we need to look into.
+            if i == 0:
+                # here, we want to find the place within the file,
+                # where the correlator we need is stored.
+                if compact:
+                    # to do so, the pattern needed is put together
+                    # from the input values
+                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
+                    if b2b:
+                        pattern += '\nwf_2      ' + str(wf2)
+                    # and the file is parsed through to find the pattern
+                    with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
+                        content = file.read()
+                        match = re.search(pattern, content)
+                        if match:
+                            # the start and end point of the correlator
+                            # in quaetion is extracted for later use in
+                            # the other files
+                            start_read = content.count('\n', 0, match.start()) + 5 + b2b
+                            end_match = re.search(r'\n\s*\n', content[match.start():])
+                            T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
+                            assert T > 0
+                            print(T, 'entries, starting to read in line', start_read)
+                        else:
+                            raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
+                else:
+                    # this part does the same as above,
+                    # but for non-compactified versions of the files
+                    with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
+                        for k, line in enumerate(fp):
+                            if version == "0.0":
+                                # check if this is really the right file
+                                # by matching pattern similar to above
+                                pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
+                                # if b2b, a second wf is needed
+                                if b2b:
+                                    pattern += ", wf_2 " + str(wf2)
+                                qs = quarks.split(" ")
+                                pattern += " : " + qs[0] + " - " + qs[1]
+                                # print(pattern)
+                            if read == 1 and not line.strip() and k > start + 1:
+                                break
+                            if read == 1 and k >= start:
+                                T += 1

+                            if version == "0.0":
+                                if pattern in line:
+                                    # print(line)
+                                    read = 1
+                                    start = k + 1
+                            else:
+                                if '[correlator]' in line:
+                                    read = 1
+                                    start = k + 7 + b2b
+                                    T -= b2b
+                        print(str(T) + " entries found.")
+                # we found where the correlator
+                # that is to be read is in the files
+                # after preparing the datastructure
+                # the correlators get parsed into...
+                deltas = []
+                for j in range(T):
+                    deltas.append([])
+
+            for t in range(T):
+                deltas[t].append(np.zeros(no_cfg))
+            # ...the actual parsing can start.
+            # we iterate through all measurement files in the path given...
+            if compact:
+                for cfg in range(no_cfg):
+                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
+                        lines = fp.readlines()
+                        # check, if the correlator is in fact
+                        # printed completely
+                        if(start_read + T > len(lines)):
+                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
+                        # and start to read the correlator.
+                        # the range here is chosen like this,
+                        # since this allows for implementing
+                        # a security check for every read correlator later...
+                        for k in range(start_read - 6, start_read + T):
+                            if k == start_read - 5 - b2b:
+                                if lines[k].strip() != 'name      ' + name:
+                                    raise Exception('Wrong format',
+                                                    sub_ls[cfg])
+                            if(k >= start_read and k < start_read + T):
+                                floats = list(map(float, lines[k].split()))
+                                deltas[k - start_read][i][cfg] = floats[-2:][im]
+            else:
+                for cnfg, subitem in enumerate(sub_ls):
+                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
+                        # since the non-compatified files
+                        # are typically not so long,
+                        # we can iterate over the whole file.
+                        # here one can also implement the chekc from above.
+                        for k, line in enumerate(fp):
+                            if(k >= start and k < start + T):
+                                floats = list(map(float, line.split()))
+                                if version == "0.0":
+                                    deltas[k - start][i][cnfg] = floats[im]
+                                else:
+                                    deltas[k - start][i][cnfg] = floats[1 + im - single]
+
+    else:
+        if "files" in kwargs:
+            ls = kwargs.get("files")
+        else:
+            for exc in ls:
+                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
+                    ls = list(set(ls) - set([exc]))
+                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+        # print(ls)
+        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
+        if b2b:
+            pattern += '\nwf_2      ' + str(wf2)
+        for rep, file in enumerate(ls):
+            rep_idl = []
+            with open(path + '/' + file, 'r') as fp:
+                content = fp.readlines()
+                data_starts = []
+                for linenumber, line in enumerate(content):
+                    if "[run]" in line:
+                        data_starts.append(linenumber)
+                if len(set([data_starts[i] - data_starts[i - 1] for i in
+                            range(1, len(data_starts))])) > 1:
+                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
+                # first chunk of data
+                chunk = content[:data_starts[1]]
+                for linenumber, line in enumerate(chunk):
+                    if line.startswith("gauge_name"):
+                        gauge_line = linenumber
+                    elif line.startswith("[correlator]"):
+                        corr_line = linenumber
+                        found_pat = ""
+                        for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
+                            found_pat += li
+                        if re.search(pattern, found_pat):
+                            start_read = corr_line + 7 + b2b
+                            T = len(chunk) - 1 - start_read
+                if rep == 0:
+                    deltas = []
+                    for t in range(T):
+                        deltas.append([])
+                for t in range(T):
+                    deltas[t].append(np.zeros(len(data_starts)))
+                # all other chunks should follow the same structure
+                for cnfg in range(len(data_starts)):
+                    start = data_starts[cnfg]
+                    stop = start + data_starts[1]
+                    chunk = content[start:stop]
+                    # meta_data = {}
+                    try:
+                        rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
+                    except Exception:
+                        raise Exception("Couldn't parse idl from directroy, problem with chunk around line " + gauge_line)
+
+                    found_pat = ""
+                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
+                        found_pat += li
+                    if re.search(pattern, found_pat):
+                        for t, line in enumerate(chunk[start_read:start_read + T]):
+                            floats = list(map(float, line.split()))
+                            deltas[t][rep][cnfg] = floats[-2:][im]
+            idl.append(rep_idl)
+
+    if "check_configs" in kwargs:
+        print("Checking for missing configs...")
+        che = kwargs.get("check_configs")
+        if not (len(che) == len(idl)):
+            raise Exception("check_configs has to be the same length as replica!")
+        for r in range(len(idl)):
+            print("checking " + new_names[r])
+            utils.check_idl(idl[r], che[r])
+        print("Done")
    result = []
    for t in range(T):
-        result.append(Obs(deltas[t], new_names))
-    return result
-
-
-def read_qtop(path, prefix, **kwargs):
-    """Read qtop format from given folder structure.
-
-    Parameters
-    ----------
-    target -- specifies the topological sector to be reweighted to (default 0)
-    full -- if true read the charge instead of the reweighting factor.
-    """
-
-    if 'target' in kwargs:
-        target = kwargs.get('target')
-    else:
-        target = 0
-
-    if kwargs.get('full'):
-        full = 1
-    else:
-        full = 0
-
-    ls = []
-    for (dirpath, dirnames, filenames) in os.walk(path):
-        ls.extend(filenames)
-        break
-
-    if not ls:
-        raise Exception('Error, directory not found')
-
-    # Exclude files with different names
-    for exc in ls:
-        if not fnmatch.fnmatch(exc, prefix + '*'):
-            ls = list(set(ls) - set([exc]))
-    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))  # New version, to cope with ids, etc.
-    replica = len(ls)
-    print('Read Q_top from', prefix[:-1], ',', replica, 'replica')
-
-    deltas = []
-
-    for rep in range(replica):
-        tmp = []
-        with open(path + '/' + ls[rep]) as fp:
-            for k, line in enumerate(fp):
-                floats = list(map(float, line.split()))
-                if full == 1:
-                    tmp.append(floats[1])
-                else:
-                    if int(floats[1]) == target:
-                        tmp.append(1.0)
-                    else:
-                        tmp.append(0.0)
-
-        deltas.append(np.array(tmp))
-
-    rep_names = []
-    for entry in ls:
-        truncated_entry = entry.split('.')[0]
-        idx = truncated_entry.index('r')
-        rep_names.append(truncated_entry[:idx] + '|' + truncated_entry[idx:])
-
-    result = Obs(deltas, rep_names)
-
+        result.append(Obs(deltas[t], new_names, idl=idl))
    return result
--- a/pyerrors/input/utils.py
+++ b/pyerrors/input/utils.py
@ -0,0 +1,15 @@
+"""Utilities for the input"""
+
+
+def check_idl(idl, che):
+    missing = []
+    for c in che:
+        if c not in idl:
+            missing.append(c)
+    # print missing such that it can directly be parsed to slurm terminal
+    if not (len(missing) == 0):
+        print(len(missing), "configs missing")
+        miss_str = str(missing[0])
+        for i in missing[1:]:
+            miss_str += "," + str(i)
+        print(miss_str)