implemented idl into sfcf-read method

2026-03-01 16:23:39 +01:00 · 2021-12-17 15:16:17 +01:00 · 2021-12-17 15:16:17 +01:00 · c5292f8342
commit c5292f8342
parent 5e5a9df404
2 changed files with 104 additions and 166 deletions
--- a/pyerrors/input/sfcf.py
+++ b/pyerrors/input/sfcf.py
@ -6,125 +6,41 @@ import fnmatch
 import re
 import numpy as np  # Thinly-wrapped numpy
 from ..obs import Obs
-
+from . import utils
 def read_sfcf_old(path, prefix, name, quarks, noffset = 0, wf=0, wf2=0, **kwargs):
    """Read sfcf format (from around 2012) from given folder structure.
    Keyword arguments
    -----------------
    im -- if True, read imaginary instead of real part of the correlation function.
    single -- if True, read a boundary-to-boundary correlation function with a single value
    b2b -- if True, read a time-dependent boundary-to-boundary correlation function
    names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
    """
    if kwargs.get('im'):
        im = 1
        part = 'imaginary'
    else:
        im = 0
        part = 'real'
    b2b = 0
    if kwargs.get('b2b'):
        b2b = 1
    quarks = quarks.split(" ")
    read = 0
    T = 0
    start = 0
    ls = []
    for (dirpath, dirnames, filenames) in os.walk(path):
        ls.extend(dirnames)
        break
    if not ls:
        print('Error, directory not found')
        #sys.exit()
    for exc in ls:
        if fnmatch.fnmatch(exc, prefix + '*'):
            ls = list(set(ls) - set(exc))
    if len(ls) > 1:
        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
    replica = len(ls)
    print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
    if 'names' in kwargs:
        new_names = kwargs.get('names')
        if len(new_names) != replica:
            raise Exception('Names does not have the required length', replica)
    else:
        new_names = ls
    print(replica, 'replica')
    for i, item in enumerate(ls):
        print(item)
        sub_ls = []
        for (dirpath, dirnames, filenames) in os.walk(path+'/'+item):
            sub_ls.extend(dirnames)
            break
        for exc in sub_ls:
            if fnmatch.fnmatch(exc, 'cfg*'):
                sub_ls = list(set(sub_ls) - set(exc))
        sub_ls.sort(key=lambda x: int(x[3:]))
        no_cfg = len(sub_ls)
        print(no_cfg, 'configurations')
        if i == 0:
            with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
                for k, line in enumerate(fp):
                    #check if this is really the right file
                    pattern = "# "+name+" : offset "+str(noffset)+", wf "+"0"
                    #if b2b, a second wf is needed
                    if b2b:
                        pattern+=", wf_2 "+"0"
                    pattern+=" : "+quarks[0]+" - "+quarks[1]
                    if read == 1 and not line.strip() and k > start + 1:
                        break
                    if read == 1 and k >= start:
                        T += 1
                    if pattern in line:
                        #print(line)
                        read = 1
                        start = k+1
                print(str(T)+" entries found.")
            deltas = []
            for j in range(T):
                deltas.append([])
        sublength = len(sub_ls)
        for j in range(T):
            deltas[j].append(np.zeros(sublength))
        for cnfg, subitem in enumerate(sub_ls):
            with open(path + '/' + item + '/' + subitem + '/'+name) as fp:
                for k, line in enumerate(fp):
                    if(k >= start and k < start + T):
                        floats = list(map(float, line.split()))
                        deltas[k-start][i][cnfg] = floats[im]
    result = []
    for t in range(T):
        result.append(Obs(deltas[t], new_names))
    return result
 def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs):
    """Read sfcf c format from given folder structure.
    Parameters
    ----------
-    quarks -- Label of the quarks used in the sfcf input file
+    quarks: str
-    noffset -- Offset of the source (only relevant when wavefunctions are used)
+        Label of the quarks used in the sfcf input file. e.g. "quark quark"
-    wf -- ID of wave function
+        for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file,
-    wf2 -- ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
+        this is done automatically for this version
-    im -- if True, read imaginary instead of real part of the correlation function.
+    noffset: int
-    b2b -- if True, read a time-dependent boundary-to-boundary correlation function
+        Offset of the source (only relevant when wavefunctions are used)
-    single -- if True, read time independent boundary to boundary correlation function
+    wf: int
-    names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
+        ID of wave function
    wf2: int
        ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
    im: bool
        if True, read imaginary instead of real part of the correlation function.
    b2b: bool
        if True, read a time-dependent boundary-to-boundary correlation function
    single: bool
        if True, read time independent boundary to boundary correlation function
    names: list
        Alternative labeling for replicas/ensembles. Has to have the appropriate length
    ens_name : str
        replaces the name of the ensemble
    version: str
        version of SFCF, with which the measurement was done. if the compact output option (-c) was spectified, append a c to the version (e.g. "1.0c")
    replica: list
        list of replica to be read, default is all
    files: list
        list of files to be read per replica, default is all. for non-conpact ouztput format, hand the folders to be read here.
    check_configs:
        list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
    """
    if kwargs.get('im'):
        im = 1
@ -142,8 +58,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
        else:
            b2b = 0
        single = 0
-
+    if "replica" in kwargs:
-    files = []
+        reps = kwargs.get("replica")
    if "files" in kwargs:
        files = kwargs.get("files")
@ -172,8 +88,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
    if not ls:
        raise Exception('Error, directory not found')
    # Exclude folders with different names
-    if len(files) != 0:
+    if "replica" in kwargs:
-        ls = files
+        ls = reps
    else:
        for exc in ls:
            if not fnmatch.fnmatch(exc, prefix + '*'):
@ -182,9 +98,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
        ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))  # New version, to cope with ids, etc.
    replica = len(ls)
    print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
-
+    idl = []
    if 'names' in kwargs:
        new_names = kwargs.get('names')
        if len(new_names)!=len(set(new_names)):
            raise Exception("names are nor unique!")
        if len(new_names) != replica:
            raise Exception('Names does not have the required length', replica)
    else:
@ -194,13 +112,18 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
            try:
                idx = entry.index('r')
            except:
-                idx = len(entry)-2
+                raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
            if 'ens_name' in kwargs:
                new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
            else:
                new_names.append(entry[:idx] + '|' + entry[idx:])
    for i, item in enumerate(ls):
        sub_ls = []
        if "files" in kwargs:
            sub_ls = kwargs.get("files")
            sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
        else:
            for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
                if compact:
                    sub_ls.extend(filenames)
@ -218,35 +141,36 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
                    if not fnmatch.fnmatch(exc, 'cfg*'):
                        sub_ls = list(set(sub_ls) - set([exc]))
                    sub_ls.sort(key=lambda x: int(x[3:]))
-        
+        #print(sub_ls)
-        if compact:
+        rep_idl = []
            first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
            last_cfg = len(sub_ls) + first_cfg - 1
            for cfg in range(1, len(sub_ls)):
                if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
                    last_cfg = cfg + first_cfg - 1
                    break
            no_cfg = last_cfg - first_cfg + 1
            print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
        else:
        no_cfg = len(sub_ls)
-            print(no_cfg, 'configurations')
+        for cfg in sub_ls:
-
+            try:
                if compact:
                    rep_idl.append(int(cfg.split("n")[-1]))
                else:
                    rep_idl.append(int(cfg[3:]))
            except:
                raise Exception("Couldn't parse idl from directroy, problem with file "+cfg)
        rep_idl.sort()
        #maybe there is a better way to print the idls
        print(item, ':', no_cfg, ' configurations')
        idl.append(rep_idl)
    #here we have found all the files we need to look into.
        if i == 0:
-            if compact:
+            #here, we want to find the place within the file, where the correlator we need is stored.
            if compact:
                #to do so, the pattern needed is put together from the input values
                pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
                if b2b:
                    pattern += '\nwf_2      ' + str(wf2)
-
+                #and the file is parsed through to find the pattern
                with open(path + '/' + item + '/' + sub_ls[0], 'r') as file:
                    content = file.read()
                    match = re.search(pattern, content)
                    if match:
                        #the start and end point of the correlator in quaetion is extracted for later use in the other files
                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
                        end_match = re.search(r'\n\s*\n', content[match.start():])
                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
@ -255,11 +179,11 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
                    else:
                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
            else:
-                #print(path + '/' + item + '/')# + sub_ls[0] + '/' + name)
+                #this part does the same as above, but for non-compactified versions of the files
                with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
                    for k, line in enumerate(fp):
                        if version == "0.0":
-                            #check if this is really the right file
+                            #check if this is really the right file by matchin pattern similar to above
                            pattern = "# "+name+" : offset "+str(noffset)+", wf "+str(wf)
                            #if b2b, a second wf is needed
                            if b2b:
@ -284,6 +208,7 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
                                T -= b2b
                    print(str(T)+" entries found.")
            #we found where the correlator that is to be read is in the files
            #after preparing the datastructure the correlators get parsed into...
            deltas = []
            for j in range(T):
                deltas.append([])
@ -291,12 +216,16 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
        sublength = no_cfg
        for j in range(T):
            deltas[j].append(np.zeros(sublength))
        #... the actual parsing can start. we iterate through all measurement files in the path given...
        if compact:
            for cfg in range(no_cfg):
                with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
                    lines = fp.readlines()
                    #check, if the correlator is in fact printed completely
                    if(start_read + T>len(lines)):
                        raise Exception("EOF before end of correlator data! Maybe "+path + '/' + item + '/' + sub_ls[cfg]+" is corrupted?")
                    #and start to read the correlator.
                    #the range here is chosen like this, since this allows for implementing a security check for every read correlator later...
                    for k in range(start_read - 6,start_read + T):
                        if k == start_read - 5 - b2b:
                            if lines[k].strip() != 'name      ' + name:
@ -307,6 +236,8 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
        else:
            for cnfg, subitem in enumerate(sub_ls):
                with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
                    #since the non-compatified files are typically not so long, we can iterate over the whole file.
                    #here one can also implement the chekc from above.
                    for k, line in enumerate(fp):
                        if(k >= start and k < start + T):
                            floats = list(map(float, line.split()))
@ -315,9 +246,17 @@ def read_sfcf(path, prefix, name, quarks='.*', noffset=0, wf=0, wf2=0, **kwargs)
                            else:
                                deltas[k - start][i][cnfg] = floats[1 + im - single]
-
+    if "check_configs" in kwargs:
        print("Chekcing for missing configs...")
        che = kwargs.get("check_configs")
        if not (len(che) == len(idl)):
            raise Exception("check_configs has to be the same length as replica!")
        for r in range(len(idl)):
            print("checking "+new_names[r])
            utils.check_idl(idl[r], che[r])
        print("Done")
    result = []
    for t in range(T):
-        result.append(Obs(deltas[t], new_names))
+        result.append(Obs(deltas[t], new_names, idl = idl))
    return result
--- a/pyerrors/input/utils.py
+++ b/pyerrors/input/utils.py
@ -1,14 +1,13 @@
-import fnmatch
+"""Utilities for the input"""
-def check_missing(idl,che):
+def check_idl(idl,che):
    missing = []
-    for ind in che:
+    for c in che:
-            if not ind in idl:
+        if not c in idl:
-                missing.append(ind)
+            missing.append(c)
-    if(len(missing) == 0):
+    #print missing such that it can directly be parsed to slurm terminal
-        print("There are no measurements missing.")
+    if not (len(missing) == 0):
-    else:
+        print(len(missing),"configs missing")
        print(len(missing),"measurements missing")
        miss_str = str(missing[0])
        for i in missing[1:]:
            miss_str += ","+str(i)