flake8 compliance

2026-02-20 03:52:45 +01:00 · 2022-01-14 16:00:40 +01:00 · 2022-01-14 16:00:40 +01:00 · 302a7ae439
commit 302a7ae439
parent 53f727092d
1 changed files with 168 additions and 109 deletions
--- a/pyerrors/input/sfcf.py
+++ b/pyerrors/input/sfcf.py
@ -8,39 +8,55 @@ import numpy as np  # Thinly-wrapped numpy
 from ..obs import Obs
 from . import utils

-def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version = "1.0c", **kwargs):
+
+def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0,
+              version="1.0c", **kwargs):
    """Read sfcf c format from given folder structure.

    Parameters
    ----------
    quarks: str
        Label of the quarks used in the sfcf input file. e.g. "quark quark"
-        for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file,
+        for version 0.0 this does NOT need to be given with the typical " - "
+        that is present in the output file,
        this is done automatically for this version
    noffset: int
        Offset of the source (only relevant when wavefunctions are used)
    wf: int
        ID of wave function
    wf2: int
-        ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
+        ID of the second wavefunction
+        (only relevant for boundary-to-boundary correlation functions)
    im: bool
-        if True, read imaginary instead of real part of the correlation function.
+        if True, read imaginary instead of real part
+        of the correlation function.
    b2b: bool
-        if True, read a time-dependent boundary-to-boundary correlation function
+        if True, read a time-dependent boundary-to-boundary
+        correlation function
    single: bool
-        if True, read time independent boundary to boundary correlation function
+        if True, read time independent boundary to boundary
+        correlation function
    names: list
-        Alternative labeling for replicas/ensembles. Has to have the appropriate length
+        Alternative labeling for replicas/ensembles.
+        Has to have the appropriate length
    ens_name : str
        replaces the name of the ensemble
    version: str
-        version of SFCF, with which the measurement was done. if the compact output option (-c) was spectified, append a c to the version (e.g. "1.0c")
+        version of SFCF, with which the measurement was done.
+        if the compact output option (-c) was spectified,
+        append a "c" to the version (e.g. "1.0c")
+        if the append output option (-a) was specified,
+        append an "a" to the version
    replica: list
        list of replica to be read, default is all
    files: list
-        list of files to be read per replica, default is all. for non-conpact ouztput format, hand the folders to be read here.
+        list of files to be read per replica, default is all.
+        for non-conpact ouztput format, hand the folders to be read here.
    check_configs:
-        list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
+        list of list of supposed configs, eg. [range(1,1000)]
+        for one replicum with 1000 configs
+    TODO:
+    - whats going on with files here?
    """
    if kwargs.get('im'):
        im = 1
@ -63,15 +79,17 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
    if "files" in kwargs:
        files = kwargs.get("files")

-    #due to higher usage in current projects, compact file format is default
+    # due to higher usage in current projects,
+    # compact file format is default
    compact = True
    appended = False
-    #get version string
-    known_versions = ["0.0","1.0","2.0","1.0c","2.0c","1.0a","2.0a"]
+    # get version string
+    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]

-    if not version in known_versions:
+    if version not in known_versions:
        raise Exception("This version is not known!")
-    #if the letter c is appended to the version, the compact fileformat is used (former read_sfcf_c)
+    # if the letter c is appended to the version,
+    # the compact fileformat is used (former read_sfcf_c)
    if(version[-1] == "c"):
        appended = False
        compact = True
@ -103,15 +121,19 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
            if not fnmatch.fnmatch(exc, prefix + '*'):
                ls = list(set(ls) - set([exc]))
    if len(ls) > 1:
-        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))  # New version, to cope with ids, etc.
+        # New version, to cope with ids, etc.
+        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
+
    if not appended:
        replica = len(ls)
    else:
-        replica = len([l.split(".")[-1] for l in ls])//len(set([l.split(".")[-1] for l in ls]))
-    print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
+        replica = len([file.split(".")[-1] for file in ls])\
+            // len(set([file.split(".")[-1] for file in ls]))
+    print('Read', part, 'part of', name, 'from', prefix[:-1],
+          ',', replica, 'replica')
    if 'names' in kwargs:
        new_names = kwargs.get('names')
-        if len(new_names)!=len(set(new_names)):
+        if len(new_names) != len(set(new_names)):
            raise Exception("names are not unique!")
        if len(new_names) != replica:
            raise Exception('Names does not have the required length', replica)
@ -123,32 +145,36 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
            for entry in ls:
                try:
                    idx = entry.index('r')
-                except:
-                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
-                    
+                except Exception:
+                    raise Exception("Automatic recognition of replicum failed, \
+                                    please enter the key word 'names'.")
+
                if 'ens_name' in kwargs:
-                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
+                    new_names.append(kwargs.get('ens_name') + '|' +
+                                     entry[idx:])
                else:
                    new_names.append(entry[:idx] + '|' + entry[idx:])
        else:
-            
+
            for exc in ls:
                if not fnmatch.fnmatch(exc, prefix + '*.'+name):
                    ls = list(set(ls) - set([exc]))
            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
            for entry in ls:
                myentry = entry[:-len(name)-1]
-                print(myentry)
+                # print(myentry)
                try:
                    idx = myentry.index('r')
-                except:
-                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
-                
+                except Exception:
+                    raise Exception("Automatic recognition of replicum failed, \
+                                    please enter the key word 'names'.")
+
                if 'ens_name' in kwargs:
-                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
+                    new_names.append(kwargs.get('ens_name') + '|' +
+                                     myentry[idx:])
                else:
                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
-            #print(new_names)
+            # print(new_names)
    idl = []
    if not appended:
        for i, item in enumerate(ls):
@ -157,24 +183,26 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
                sub_ls = kwargs.get("files")
                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
            else:
-                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
+                for (dirpath, dirnames, filenames) in \
+                     os.walk(path + '/' + item):
                    if compact:
                        sub_ls.extend(filenames)
                    else:
                        sub_ls.extend(dirnames)
                    break
-            
-                #print(sub_ls)
-                for exc in sub_ls:    
+
+                # print(sub_ls)
+                for exc in sub_ls:
                    if compact:
                        if not fnmatch.fnmatch(exc, prefix + '*'):
                            sub_ls = list(set(sub_ls) - set([exc]))
-                        sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
+                        sub_ls.sort(key=lambda x:
+                                    int(re.findall(r'\d+', x)[-1]))
                    else:
                        if not fnmatch.fnmatch(exc, 'cfg*'):
                            sub_ls = list(set(sub_ls) - set([exc]))
                        sub_ls.sort(key=lambda x: int(x[3:]))
-            #print(sub_ls)
+            # print(sub_ls)
            rep_idl = []
            no_cfg = len(sub_ls)
            for cfg in sub_ls:
@ -183,54 +211,73 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
                        rep_idl.append(int(cfg.split("n")[-1]))
                    else:
                        rep_idl.append(int(cfg[3:]))
-                except:
-                    raise Exception("Couldn't parse idl from directroy, problem with file "+cfg)
+                except Exception:
+                    raise Exception("Couldn't parse idl from directroy, \
+                        problem with file "+cfg)
            rep_idl.sort()
-            #maybe there is a better way to print the idls
+            # maybe there is a better way to print the idls
            print(item, ':', no_cfg, ' configurations')
            idl.append(rep_idl)
-        #here we have found all the files we need to look into.
+        # here we have found all the files we need to look into.
            if i == 0:
-                #here, we want to find the place within the file, where the correlator we need is stored.
+                # here, we want to find the place within the file,
+                # where the correlator we need is stored.
                if compact:
-                    #to do so, the pattern needed is put together from the input values
-                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
+                    # to do so, the pattern needed is put together
+                    # from the input values
+                    pattern = 'name      ' + name + '\nquarks    '\
+                            + quarks + '\noffset    '\
+                            + str(noffset) + '\nwf        '\
+                            + str(wf)
                    if b2b:
                        pattern += '\nwf_2      ' + str(wf2)
-                    #and the file is parsed through to find the pattern
-                    with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
+                    # and the file is parsed through to find the pattern
+                    with open(path + '/' + item + '/' + sub_ls[0], 'r') \
+                            as file:
                        content = file.read()
                        match = re.search(pattern, content)
                        if match:
-                            #the start and end point of the correlator in quaetion is extracted for later use in the other files
-                            start_read = content.count('\n', 0, match.start()) + 5 + b2b
-                            end_match = re.search(r'\n\s*\n', content[match.start():])
-                            T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
+                            # the start and end point of the correlator
+                            # in quaetion is extracted for later use in
+                            # the other files
+                            start_read = content.count('\n', 0, match.start())\
+                                                       + 5 + b2b
+                            end_match = re.search(r'\n\s*\n',
+                                                  content[match.start():])
+                            T = content[match.start():]\
+                                .count('\n', 0, end_match.start()) - 4 - b2b
                            assert T > 0
-                            print(T, 'entries, starting to read in line', start_read)
+                            print(T, 'entries, starting to read in line',
+                                  start_read)
                        else:
-                            raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
+                            raise Exception('Correlator with pattern\n'
+                                            + pattern + '\nnot found.')
                else:
-                    #this part does the same as above, but for non-compactified versions of the files
-                    with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
+                    # this part does the same as above,
+                    # but for non-compactified versions of the files
+                    with open(path + '/' + item + '/' + sub_ls[0] + '/'
+                              + name) as fp:
                        for k, line in enumerate(fp):
                            if version == "0.0":
-                                #check if this is really the right file by matchin pattern similar to above
-                                pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf)
-                                #if b2b, a second wf is needed
+                                # check if this is really the right file
+                                # by matching pattern similar to above
+                                pattern = "# "+name+" : offset "+str(noffset)\
+                                        + ", wf "+str(wf)
+                                # if b2b, a second wf is needed
                                if b2b:
-                                    pattern+=", wf_2 "+str(wf2)
+                                    pattern += ", wf_2 "+str(wf2)
                                qs = quarks.split(" ")
-                                pattern+=" : "+qs[0]+" - "+qs[1]
-                                #print(pattern)
-                            if read == 1 and not line.strip() and k > start + 1:
+                                pattern += " : " + qs[0]+" - " + qs[1]
+                                # print(pattern)
+                            if read == 1 and not line.strip() \
+                                    and k > start + 1:
                                break
                            if read == 1 and k >= start:
                                T += 1

                            if version == "0.0":
                                if pattern in line:
-                                    #print(line)
+                                    # print(line)
                                    read = 1
                                    start = k+1
                            else:
@ -239,121 +286,133 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, version =
                                    start = k + 7 + b2b
                                    T -= b2b
                        print(str(T)+" entries found.")
-                #we found where the correlator that is to be read is in the files
-                #after preparing the datastructure the correlators get parsed into...
+                # we found where the correlator
+                # that is to be read is in the files
+                # after preparing the datastructure
+                # the correlators get parsed into...
                deltas = []
                for j in range(T):
                    deltas.append([])
-            
-            
+
            for t in range(T):
                deltas[t].append(np.zeros(no_cfg))
-            #... the actual parsing can start. we iterate through all measurement files in the path given...
+            # ...the actual parsing can start.
+            # we iterate through all measurement files in the path given...
            if compact:
                for cfg in range(no_cfg):
                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
                        lines = fp.readlines()
-                        #check, if the correlator is in fact printed completely
-                        if(start_read + T>len(lines)):
-                            raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?")
-                        #and start to read the correlator.
-                        #the range here is chosen like this, since this allows for implementing a security check for every read correlator later...
-                        for k in range(start_read - 6,start_read + T):
+                        # check, if the correlator is in fact
+                        # printed completely
+                        if(start_read + T > len(lines)):
+                            raise Exception("EOF before end of correlator data! \
+                                            Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" \
+                                            is corrupted?")
+                        # and start to read the correlator.
+                        # the range here is chosen like this,
+                        # since this allows for implementing
+                        # a security check for every read correlator later...
+                        for k in range(start_read - 6, start_read + T):
                            if k == start_read - 5 - b2b:
                                if lines[k].strip() != 'name      ' + name:
-                                    raise Exception('Wrong format', sub_ls[cfg])
+                                    raise Exception('Wrong format',
+                                                    sub_ls[cfg])
                            if(k >= start_read and k < start_read + T):
                                floats = list(map(float, lines[k].split()))
-                                deltas[k - start_read][i][cfg] = floats[-2:][im]
+                                deltas[k - start_read][i][cfg] = \
+                                    floats[-2:][im]
            else:
                for cnfg, subitem in enumerate(sub_ls):
-                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
-                        #since the non-compatified files are typically not so long, we can iterate over the whole file.
-                        #here one can also implement the chekc from above.
+                    with open(path + '/' + item + '/' + subitem
+                              + '/' + name) as fp:
+                        # since the non-compatified files
+                        # are typically not so long,
+                        # we can iterate over the whole file.
+                        # here one can also implement the chekc from above.
                        for k, line in enumerate(fp):
                            if(k >= start and k < start + T):
                                floats = list(map(float, line.split()))
                                if version == "0.0":
                                    deltas[k-start][i][cnfg] = floats[im]
                                else:
-                                    deltas[k - start][i][cnfg] = floats[1 + im - single]
-                                        
+                                    deltas[k - start][i][cnfg] = \
+                                        floats[1 + im - single]
+
    else:
        for exc in ls:
            if not fnmatch.fnmatch(exc, prefix + '*.'+name):
                ls = list(set(ls) - set([exc]))
            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
-        #print(ls)
-        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
+        # print(ls)
+        pattern = 'name      ' + name + '\nquarks    '\
+            + quarks + '\noffset    ' + str(noffset)\
+            + '\nwf        ' + str(wf)
        if b2b:
            pattern += '\nwf_2      ' + str(wf2)
-        for rep,file in enumerate(ls):
+        for rep, file in enumerate(ls):
            rep_idl = []
            with open(path + '/' + file, 'r') as fp:
                content = fp.readlines()
                data_starts = []
-                for l,line in enumerate(content):
+                for linenumber, line in enumerate(content):
                    if "[run]" in line:
-                        data_starts.append(l)
-                if len(set([data_starts[i]-data_starts[i-1] for i in range(1,len(data_starts))])) > 1:
-                    raise Exception ("Irregularities in file structure found, not all runs have the same output length")
-                #print(data_starts)
-                #first chunk of data
+                        data_starts.append(linenumber)
+                if len(set([data_starts[i]-data_starts[i-1] for i in
+                           range(1, len(data_starts))])) > 1:
+                    raise Exception("Irregularities in file structure found,\
+                                     not all runs have the same output length")
+                # first chunk of data
                chunk = content[:data_starts[1]]
-                for l,line in enumerate(chunk):
+                for linenumber, line in enumerate(chunk):
                    if line.startswith("gauge_name"):
-                        gauge_line = l
-                        #meta_data["gauge_name"] = (line.strip()).split("/")[-1]
+                        gauge_line = linenumber
                    elif line.startswith("[correlator]"):
-                        corr_line = l
+                        corr_line = linenumber
                        found_pat = ""
                        for li in chunk[corr_line+1:corr_line+6+b2b]:
                            found_pat += li
-                        if re.search(pattern,found_pat):
+                        if re.search(pattern, found_pat):
                            start_read = corr_line+7+b2b
-                            T=len(chunk)-1-start_read
+                            T = len(chunk)-1-start_read
                if rep == 0:
                    deltas = []
                    for t in range(T):
                        deltas.append([])
                for t in range(T):
                    deltas[t].append(np.zeros(len(data_starts)))
-                #all other chunks should follow the same structure
+                # all other chunks should follow the same structure
                for cnfg in range(len(data_starts)):
                    start = data_starts[cnfg]
                    stop = start+data_starts[1]
                    chunk = content[start:stop]
-                    #meta_data = {}
-                    
+                    # meta_data = {}
                    try:
                        rep_idl.append(int(chunk[gauge_line].split("n")[-1]))
-                    except:
-                        raise Exception("Couldn't parse idl from directroy, problem with chunk around line "+gauge_line)
-                    
+                    except Exception:
+                        raise Exception("Couldn't parse idl from directroy, \
+                            problem with chunk around line "+gauge_line)
+
                    found_pat = ""
                    for li in chunk[corr_line+1:corr_line+6+b2b]:
                        found_pat += li
-                    if re.search(pattern,found_pat):
-                        #print("found pattern")
-                        for t,line in enumerate(chunk[start_read:start_read+T]):
+                    if re.search(pattern, found_pat):
+                        for t, line in \
+                                  enumerate(chunk[start_read:start_read+T]):
                            floats = list(map(float, line.split()))
                            deltas[t][rep][cnfg] = floats[-2:][im]
            idl.append(rep_idl)

-    #print(new_names)
-    #print(deltas)    
-    #print(idl)
    if "check_configs" in kwargs:
        print("Checking for missing configs...")
        che = kwargs.get("check_configs")
        if not (len(che) == len(idl)):
-            raise Exception("check_configs has to be the same length as replica!")
+            raise Exception("check_configs has to be the same length\
+                            as replica!")
        for r in range(len(idl)):
            print("checking "+new_names[r])
            utils.check_idl(idl[r], che[r])
        print("Done")
    result = []
    for t in range(T):
-        result.append(Obs(deltas[t], new_names, idl = idl))
+        result.append(Obs(deltas[t], new_names, idl=idl))
    return result
-