refactor: unnecessary comments in input/sfcf removed, typos corrected

2026-01-04 21:51:50 +01:00 · 2022-01-16 16:50:51 +01:00 · 2022-01-16 16:50:51 +01:00 · f8cbaef626
commit f8cbaef626
parent b0bc9c8e47
1 changed files with 5 additions and 43 deletions
--- a/pyerrors/input/sfcf.py
+++ b/pyerrors/input/sfcf.py
@ -43,15 +43,16 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
        replaces the name of the ensemble
    version: str
        version of SFCF, with which the measurement was done.
-        if the compact output option (-c) was spectified,
+        if the compact output option (-c) was specified,
        append a "c" to the version (e.g. "1.0c")
        if the append output option (-a) was specified,
-        append an "a" to the version
+        append an "a" to the version. Currently supported versions
        are "0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a" and "2.0a".
    replica: list
        list of replica to be read, default is all
    files: list
        list of files to be read per replica, default is all.
-        for non-conpact ouztput format, hand the folders to be read here.
+        for non-compact output format, hand the folders to be read here.
    check_configs:
        list of list of supposed configs, eg. [range(1,1000)]
        for one replicum with 1000 configs
@ -77,17 +78,12 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
    if "replica" in kwargs:
        reps = kwargs.get("replica")
    # due to higher usage in current projects,
    # compact file format is default
    compact = True
    appended = False
    # get version string
    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
    if version not in known_versions:
        raise Exception("This version is not known!")
    # if the letter c is appended to the version,
    # the compact fileformat is used (former read_sfcf_c)
    if(version[-1] == "c"):
        appended = False
        compact = True
@ -119,7 +115,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
            if not fnmatch.fnmatch(exc, prefix + '*'):
                ls = list(set(ls) - set([exc]))
    if len(ls) > 1:
        # New version, to cope with ids, etc.
        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
    if not appended:
@ -135,8 +130,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
        if len(new_names) != replica:
            raise Exception('Names does not have the required length', replica)
    else:
        # Adjust replica names to new bookmarking system
        new_names = []
        if not appended:
            for entry in ls:
@ -157,7 +150,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
            for entry in ls:
                myentry = entry[:-len(name) - 1]
                # print(myentry)
                try:
                    idx = myentry.index('r')
                except Exception:
@ -167,7 +159,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
                else:
                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
            # print(new_names)
    idl = []
    if not appended:
        for i, item in enumerate(ls):
@ -183,7 +174,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                        sub_ls.extend(dirnames)
                    break
                # print(sub_ls)
                for exc in sub_ls:
                    if compact:
                        if not fnmatch.fnmatch(exc, prefix + '*'):
@ -194,7 +184,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                        if not fnmatch.fnmatch(exc, 'cfg*'):
                            sub_ls = list(set(sub_ls) - set([exc]))
                        sub_ls.sort(key=lambda x: int(x[3:]))
            # print(sub_ls)
            rep_idl = []
            no_cfg = len(sub_ls)
            for cfg in sub_ls:
@ -206,26 +195,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                except Exception:
                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
            rep_idl.sort()
            # maybe there is a better way to print the idls
            print(item, ':', no_cfg, ' configurations')
            idl.append(rep_idl)
        # here we have found all the files we need to look into.
            if i == 0:
                # here, we want to find the place within the file,
                # where the correlator we need is stored.
                if compact:
                    # to do so, the pattern needed is put together
                    # from the input values
                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
                    if b2b:
                        pattern += '\nwf_2      ' + str(wf2)
                    # and the file is parsed through to find the pattern
                    with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
                        content = file.read()
                        match = re.search(pattern, content)
                        if match:
                            # the start and end point of the correlator
-                            # in quaetion is extracted for later use in
+                            # in question is extracted for later use in
                            # the other files
                            start_read = content.count('\n', 0, match.start()) + 5 + b2b
                            end_match = re.search(r'\n\s*\n', content[match.start():])
@ -248,7 +230,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                                    pattern += ", wf_2 " + str(wf2)
                                qs = quarks.split(" ")
                                pattern += " : " + qs[0] + " - " + qs[1]
                                # print(pattern)
                            if read == 1 and not line.strip() and k > start + 1:
                                break
                            if read == 1 and k >= start:
@ -265,30 +246,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                                    start = k + 7 + b2b
                                    T -= b2b
                        print(str(T) + " entries found.")
                # we found where the correlator
                # that is to be read is in the files
                # after preparing the datastructure
                # the correlators get parsed into...
                deltas = []
                for j in range(T):
                    deltas.append([])
            for t in range(T):
                deltas[t].append(np.zeros(no_cfg))
            # ...the actual parsing can start.
            # we iterate through all measurement files in the path given...
            if compact:
                for cfg in range(no_cfg):
                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
                        lines = fp.readlines()
                        # check, if the correlator is in fact
                        # printed completely
                        if(start_read + T > len(lines)):
                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
                        # and start to read the correlator.
                        # the range here is chosen like this,
                        # since this allows for implementing
                        # a security check for every read correlator later...
                        for k in range(start_read - 6, start_read + T):
                            if k == start_read - 5 - b2b:
                                if lines[k].strip() != 'name      ' + name:
@ -300,10 +270,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
            else:
                for cnfg, subitem in enumerate(sub_ls):
                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
                        # since the non-compatified files
                        # are typically not so long,
                        # we can iterate over the whole file.
                        # here one can also implement the chekc from above.
                        for k, line in enumerate(fp):
                            if(k >= start and k < start + T):
                                floats = list(map(float, line.split()))
@ -320,7 +286,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
                    ls = list(set(ls) - set([exc]))
                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
        # print(ls)
        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
        if b2b:
            pattern += '\nwf_2      ' + str(wf2)
@ -335,7 +300,6 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                if len(set([data_starts[i] - data_starts[i - 1] for i in
                            range(1, len(data_starts))])) > 1:
                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
                # first chunk of data
                chunk = content[:data_starts[1]]
                for linenumber, line in enumerate(chunk):
                    if line.startswith("gauge_name"):
@ -354,12 +318,10 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
                        deltas.append([])
                for t in range(T):
                    deltas[t].append(np.zeros(len(data_starts)))
                # all other chunks should follow the same structure
                for cnfg in range(len(data_starts)):
                    start = data_starts[cnfg]
                    stop = start + data_starts[1]
                    chunk = content[start:stop]
                    # meta_data = {}
                    try:
                        rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
                    except Exception: