pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from . import utils
  7
  8
  9def read_sfcf(path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", **kwargs):
 10    """Read sfcf c format from given folder structure.
 11
 12    Parameters
 13    ----------
 14    quarks: str
 15        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 16        for version 0.0 this does NOT need to be given with the typical " - "
 17        that is present in the output file,
 18        this is done automatically for this version
 19    noffset: int
 20        Offset of the source (only relevant when wavefunctions are used)
 21    wf: int
 22        ID of wave function
 23    wf2: int
 24        ID of the second wavefunction
 25        (only relevant for boundary-to-boundary correlation functions)
 26    im: bool
 27        if True, read imaginary instead of real part
 28        of the correlation function.
 29    corr_type : str
 30        change between bi (boundary - inner) (default) bib (boundary - inner - boundary) and bb (boundary - boundary)
 31        correlator types
 32    names : list
 33        Alternative labeling for replicas/ensembles.
 34        Has to have the appropriate length
 35    ens_name : str
 36        replaces the name of the ensemble
 37    version: str
 38        version of SFCF, with which the measurement was done.
 39        if the compact output option (-c) was specified,
 40        append a "c" to the version (e.g. "1.0c")
 41        if the append output option (-a) was specified,
 42        append an "a" to the version
 43    cfg_separator : str
 44        String that separates the ensemble identifier from the configuration number (default 'n').
 45    replica: list
 46        list of replica to be read, default is all
 47    files: list
 48        list of files to be read per replica, default is all.
 49        for non-compact output format, hand the folders to be read here.
 50    check_configs:
 51        list of list of supposed configs, eg. [range(1,1000)]
 52        for one replicum with 1000 configs
 53    """
 54    if kwargs.get('im'):
 55        im = 1
 56        part = 'imaginary'
 57    else:
 58        im = 0
 59        part = 'real'
 60    if "replica" in kwargs:
 61        reps = kwargs.get("replica")
 62    if corr_type == 'bb':
 63        b2b = True
 64        single = True
 65    elif corr_type == 'bib':
 66        b2b = True
 67        single = False
 68    else:
 69        b2b = False
 70        single = False
 71    compact = True
 72    appended = False
 73    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
 74
 75    if version not in known_versions:
 76        raise Exception("This version is not known!")
 77    if(version[-1] == "c"):
 78        appended = False
 79        compact = True
 80        version = version[:-1]
 81    elif(version[-1] == "a"):
 82        appended = True
 83        compact = False
 84        version = version[:-1]
 85    else:
 86        compact = False
 87        appended = False
 88    read = 0
 89    T = 0
 90    start = 0
 91    ls = []
 92    if "replica" in kwargs:
 93        ls = reps
 94    else:
 95        for (dirpath, dirnames, filenames) in os.walk(path):
 96            if not appended:
 97                ls.extend(dirnames)
 98            else:
 99                ls.extend(filenames)
100            break
101        if not ls:
102            raise Exception('Error, directory not found')
103        # Exclude folders with different names
104        for exc in ls:
105            if not fnmatch.fnmatch(exc, prefix + '*'):
106                ls = list(set(ls) - set([exc]))
107
108    if not appended:
109        if len(ls) > 1:
110            # New version, to cope with ids, etc.
111            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
112        replica = len(ls)
113    else:
114        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
115    print('Read', part, 'part of', name, 'from', prefix[:-1],
116          ',', replica, 'replica')
117    if 'names' in kwargs:
118        new_names = kwargs.get('names')
119        if len(new_names) != len(set(new_names)):
120            raise Exception("names are not unique!")
121        if len(new_names) != replica:
122            raise Exception('Names does not have the required length', replica)
123    else:
124        new_names = []
125        if not appended:
126            for entry in ls:
127                try:
128                    idx = entry.index('r')
129                except Exception:
130                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
131
132                if 'ens_name' in kwargs:
133                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
134                else:
135                    new_names.append(entry[:idx] + '|' + entry[idx:])
136        else:
137
138            for exc in ls:
139                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
140                    ls = list(set(ls) - set([exc]))
141            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
142            for entry in ls:
143                myentry = entry[:-len(name) - 1]
144                try:
145                    idx = myentry.index('r')
146                except Exception:
147                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
148
149                if 'ens_name' in kwargs:
150                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
151                else:
152                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
153    idl = []
154    if not appended:
155        for i, item in enumerate(ls):
156            sub_ls = []
157            if "files" in kwargs:
158                sub_ls = kwargs.get("files")
159                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
160            else:
161                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
162                    if compact:
163                        sub_ls.extend(filenames)
164                    else:
165                        sub_ls.extend(dirnames)
166                    break
167                if compact:
168                    for exc in sub_ls:
169                        if not fnmatch.fnmatch(exc, prefix + '*'):
170                            sub_ls = list(set(sub_ls) - set([exc]))
171                    sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
172                else:
173                    for exc in sub_ls:
174                        if not fnmatch.fnmatch(exc, 'cfg*'):
175                            sub_ls = list(set(sub_ls) - set([exc]))
176                    sub_ls.sort(key=lambda x: int(x[3:]))
177            rep_idl = []
178            no_cfg = len(sub_ls)
179            for cfg in sub_ls:
180                try:
181                    if compact:
182                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
183                    else:
184                        rep_idl.append(int(cfg[3:]))
185                except Exception:
186                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
187            rep_idl.sort()
188            # maybe there is a better way to print the idls
189            print(item, ':', no_cfg, ' configurations')
190            idl.append(rep_idl)
191            # here we have found all the files we need to look into.
192            if i == 0:
193                # here, we want to find the place within the file,
194                # where the correlator we need is stored.
195                # to do so, the pattern needed is put together
196                # from the input values
197                if version == "0.0":
198                    pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
199                    # if b2b, a second wf is needed
200                    if b2b:
201                        pattern += ", wf_2 " + str(wf2)
202                    qs = quarks.split(" ")
203                    pattern += " : " + qs[0] + " - " + qs[1]
204                    file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
205                    for k, line in enumerate(file):
206                        if read == 1 and not line.strip() and k > start + 1:
207                            break
208                        if read == 1 and k >= start:
209                            T += 1
210                        if pattern in line:
211                            read = 1
212                            start = k + 1
213                    print(str(T) + " entries found.")
214                    file.close()
215                else:
216                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
217                    if b2b:
218                        pattern += '\nwf_2      ' + str(wf2)
219                    # and the file is parsed through to find the pattern
220                    if compact:
221                        file = open(path + '/' + item + '/' + sub_ls[0], "r")
222                    else:
223                        # for non-compactified versions of the files
224                        file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
225
226                    content = file.read()
227                    match = re.search(pattern, content)
228                    if match:
229                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
230                        end_match = re.search(r'\n\s*\n', content[match.start():])
231                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
232                        assert T > 0
233                        print(T, 'entries, starting to read in line', start_read)
234                        file.close()
235                    else:
236                        file.close()
237                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
238
239                # we found where the correlator
240                # that is to be read is in the files
241                # after preparing the datastructure
242                # the correlators get parsed into...
243                deltas = []
244                for j in range(T):
245                    deltas.append([])
246
247            for t in range(T):
248                deltas[t].append(np.zeros(no_cfg))
249            if compact:
250                for cfg in range(no_cfg):
251                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
252                        lines = fp.readlines()
253                        # check, if the correlator is in fact
254                        # printed completely
255                        if(start_read + T > len(lines)):
256                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
257                        # and start to read the correlator.
258                        # the range here is chosen like this,
259                        # since this allows for implementing
260                        # a security check for every read correlator later...
261                        for k in range(start_read - 6, start_read + T):
262                            if k == start_read - 5 - b2b:
263                                if lines[k].strip() != 'name      ' + name:
264                                    raise Exception('Wrong format', sub_ls[cfg])
265                            if(k >= start_read and k < start_read + T):
266                                floats = list(map(float, lines[k].split()))
267                                deltas[k - start_read][i][cfg] = floats[-2:][im]
268            else:
269                for cnfg, subitem in enumerate(sub_ls):
270                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
271                        # since the non-compatified files
272                        # are typically not so long,
273                        # we can iterate over the whole file.
274                        # here one can also implement the chekc from above.
275                        for k, line in enumerate(fp):
276                            if(k >= start_read and k < start_read + T):
277                                floats = list(map(float, line.split()))
278                                if version == "0.0":
279                                    deltas[k - start][i][cnfg] = floats[im - single]
280                                else:
281                                    deltas[k - start_read][i][cnfg] = floats[1 + im - single]
282
283    else:
284        if "files" in kwargs:
285            ls = kwargs.get("files")
286        else:
287            for exc in ls:
288                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
289                    ls = list(set(ls) - set([exc]))
290                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
291        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
292        if b2b:
293            pattern += '\nwf_2      ' + str(wf2)
294        for rep, file in enumerate(ls):
295            rep_idl = []
296            with open(path + '/' + file, 'r') as fp:
297                content = fp.readlines()
298                data_starts = []
299                for linenumber, line in enumerate(content):
300                    if "[run]" in line:
301                        data_starts.append(linenumber)
302                if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
303                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
304                chunk = content[:data_starts[1]]
305                for linenumber, line in enumerate(chunk):
306                    if line.startswith("gauge_name"):
307                        gauge_line = linenumber
308                    elif line.startswith("[correlator]"):
309                        corr_line = linenumber
310                        found_pat = ""
311                        for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
312                            found_pat += li
313                        if re.search(pattern, found_pat):
314                            start_read = corr_line + 7 + b2b
315                            break
316                endline = corr_line + 6 + b2b
317                while not chunk[endline] == "\n":
318                    endline += 1
319                T = endline - start_read
320                if rep == 0:
321                    deltas = []
322                    for t in range(T):
323                        deltas.append([])
324                for t in range(T):
325                    deltas[t].append(np.zeros(len(data_starts)))
326                # all other chunks should follow the same structure
327                for cnfg in range(len(data_starts)):
328                    start = data_starts[cnfg]
329                    stop = start + data_starts[1]
330                    chunk = content[start:stop]
331                    try:
332                        rep_idl.append(int(chunk[gauge_line].split(cfg_separator)[-1]))
333                    except Exception:
334                        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
335
336                    found_pat = ""
337                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
338                        found_pat += li
339                    if re.search(pattern, found_pat):
340                        for t, line in enumerate(chunk[start_read:start_read + T]):
341                            floats = list(map(float, line.split()))
342                            deltas[t][rep][cnfg] = floats[im + 1 - single]
343            idl.append(rep_idl)
344
345    if "check_configs" in kwargs:
346        print("Checking for missing configs...")
347        che = kwargs.get("check_configs")
348        if not (len(che) == len(idl)):
349            raise Exception("check_configs has to be the same length as replica!")
350        for r in range(len(idl)):
351            print("checking " + new_names[r])
352            utils.check_idl(idl[r], che[r])
353        print("Done")
354    result = []
355    for t in range(T):
356        result.append(Obs(deltas[t], new_names, idl=idl))
357    return result
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', **kwargs)
 10def read_sfcf(path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", **kwargs):
 11    """Read sfcf c format from given folder structure.
 12
 13    Parameters
 14    ----------
 15    quarks: str
 16        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 17        for version 0.0 this does NOT need to be given with the typical " - "
 18        that is present in the output file,
 19        this is done automatically for this version
 20    noffset: int
 21        Offset of the source (only relevant when wavefunctions are used)
 22    wf: int
 23        ID of wave function
 24    wf2: int
 25        ID of the second wavefunction
 26        (only relevant for boundary-to-boundary correlation functions)
 27    im: bool
 28        if True, read imaginary instead of real part
 29        of the correlation function.
 30    corr_type : str
 31        change between bi (boundary - inner) (default) bib (boundary - inner - boundary) and bb (boundary - boundary)
 32        correlator types
 33    names : list
 34        Alternative labeling for replicas/ensembles.
 35        Has to have the appropriate length
 36    ens_name : str
 37        replaces the name of the ensemble
 38    version: str
 39        version of SFCF, with which the measurement was done.
 40        if the compact output option (-c) was specified,
 41        append a "c" to the version (e.g. "1.0c")
 42        if the append output option (-a) was specified,
 43        append an "a" to the version
 44    cfg_separator : str
 45        String that separates the ensemble identifier from the configuration number (default 'n').
 46    replica: list
 47        list of replica to be read, default is all
 48    files: list
 49        list of files to be read per replica, default is all.
 50        for non-compact output format, hand the folders to be read here.
 51    check_configs:
 52        list of list of supposed configs, eg. [range(1,1000)]
 53        for one replicum with 1000 configs
 54    """
 55    if kwargs.get('im'):
 56        im = 1
 57        part = 'imaginary'
 58    else:
 59        im = 0
 60        part = 'real'
 61    if "replica" in kwargs:
 62        reps = kwargs.get("replica")
 63    if corr_type == 'bb':
 64        b2b = True
 65        single = True
 66    elif corr_type == 'bib':
 67        b2b = True
 68        single = False
 69    else:
 70        b2b = False
 71        single = False
 72    compact = True
 73    appended = False
 74    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
 75
 76    if version not in known_versions:
 77        raise Exception("This version is not known!")
 78    if(version[-1] == "c"):
 79        appended = False
 80        compact = True
 81        version = version[:-1]
 82    elif(version[-1] == "a"):
 83        appended = True
 84        compact = False
 85        version = version[:-1]
 86    else:
 87        compact = False
 88        appended = False
 89    read = 0
 90    T = 0
 91    start = 0
 92    ls = []
 93    if "replica" in kwargs:
 94        ls = reps
 95    else:
 96        for (dirpath, dirnames, filenames) in os.walk(path):
 97            if not appended:
 98                ls.extend(dirnames)
 99            else:
100                ls.extend(filenames)
101            break
102        if not ls:
103            raise Exception('Error, directory not found')
104        # Exclude folders with different names
105        for exc in ls:
106            if not fnmatch.fnmatch(exc, prefix + '*'):
107                ls = list(set(ls) - set([exc]))
108
109    if not appended:
110        if len(ls) > 1:
111            # New version, to cope with ids, etc.
112            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
113        replica = len(ls)
114    else:
115        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
116    print('Read', part, 'part of', name, 'from', prefix[:-1],
117          ',', replica, 'replica')
118    if 'names' in kwargs:
119        new_names = kwargs.get('names')
120        if len(new_names) != len(set(new_names)):
121            raise Exception("names are not unique!")
122        if len(new_names) != replica:
123            raise Exception('Names does not have the required length', replica)
124    else:
125        new_names = []
126        if not appended:
127            for entry in ls:
128                try:
129                    idx = entry.index('r')
130                except Exception:
131                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
132
133                if 'ens_name' in kwargs:
134                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
135                else:
136                    new_names.append(entry[:idx] + '|' + entry[idx:])
137        else:
138
139            for exc in ls:
140                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
141                    ls = list(set(ls) - set([exc]))
142            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
143            for entry in ls:
144                myentry = entry[:-len(name) - 1]
145                try:
146                    idx = myentry.index('r')
147                except Exception:
148                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
149
150                if 'ens_name' in kwargs:
151                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
152                else:
153                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
154    idl = []
155    if not appended:
156        for i, item in enumerate(ls):
157            sub_ls = []
158            if "files" in kwargs:
159                sub_ls = kwargs.get("files")
160                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
161            else:
162                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
163                    if compact:
164                        sub_ls.extend(filenames)
165                    else:
166                        sub_ls.extend(dirnames)
167                    break
168                if compact:
169                    for exc in sub_ls:
170                        if not fnmatch.fnmatch(exc, prefix + '*'):
171                            sub_ls = list(set(sub_ls) - set([exc]))
172                    sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
173                else:
174                    for exc in sub_ls:
175                        if not fnmatch.fnmatch(exc, 'cfg*'):
176                            sub_ls = list(set(sub_ls) - set([exc]))
177                    sub_ls.sort(key=lambda x: int(x[3:]))
178            rep_idl = []
179            no_cfg = len(sub_ls)
180            for cfg in sub_ls:
181                try:
182                    if compact:
183                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
184                    else:
185                        rep_idl.append(int(cfg[3:]))
186                except Exception:
187                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
188            rep_idl.sort()
189            # maybe there is a better way to print the idls
190            print(item, ':', no_cfg, ' configurations')
191            idl.append(rep_idl)
192            # here we have found all the files we need to look into.
193            if i == 0:
194                # here, we want to find the place within the file,
195                # where the correlator we need is stored.
196                # to do so, the pattern needed is put together
197                # from the input values
198                if version == "0.0":
199                    pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
200                    # if b2b, a second wf is needed
201                    if b2b:
202                        pattern += ", wf_2 " + str(wf2)
203                    qs = quarks.split(" ")
204                    pattern += " : " + qs[0] + " - " + qs[1]
205                    file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
206                    for k, line in enumerate(file):
207                        if read == 1 and not line.strip() and k > start + 1:
208                            break
209                        if read == 1 and k >= start:
210                            T += 1
211                        if pattern in line:
212                            read = 1
213                            start = k + 1
214                    print(str(T) + " entries found.")
215                    file.close()
216                else:
217                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
218                    if b2b:
219                        pattern += '\nwf_2      ' + str(wf2)
220                    # and the file is parsed through to find the pattern
221                    if compact:
222                        file = open(path + '/' + item + '/' + sub_ls[0], "r")
223                    else:
224                        # for non-compactified versions of the files
225                        file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
226
227                    content = file.read()
228                    match = re.search(pattern, content)
229                    if match:
230                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
231                        end_match = re.search(r'\n\s*\n', content[match.start():])
232                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
233                        assert T > 0
234                        print(T, 'entries, starting to read in line', start_read)
235                        file.close()
236                    else:
237                        file.close()
238                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
239
240                # we found where the correlator
241                # that is to be read is in the files
242                # after preparing the datastructure
243                # the correlators get parsed into...
244                deltas = []
245                for j in range(T):
246                    deltas.append([])
247
248            for t in range(T):
249                deltas[t].append(np.zeros(no_cfg))
250            if compact:
251                for cfg in range(no_cfg):
252                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
253                        lines = fp.readlines()
254                        # check, if the correlator is in fact
255                        # printed completely
256                        if(start_read + T > len(lines)):
257                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
258                        # and start to read the correlator.
259                        # the range here is chosen like this,
260                        # since this allows for implementing
261                        # a security check for every read correlator later...
262                        for k in range(start_read - 6, start_read + T):
263                            if k == start_read - 5 - b2b:
264                                if lines[k].strip() != 'name      ' + name:
265                                    raise Exception('Wrong format', sub_ls[cfg])
266                            if(k >= start_read and k < start_read + T):
267                                floats = list(map(float, lines[k].split()))
268                                deltas[k - start_read][i][cfg] = floats[-2:][im]
269            else:
270                for cnfg, subitem in enumerate(sub_ls):
271                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
272                        # since the non-compatified files
273                        # are typically not so long,
274                        # we can iterate over the whole file.
275                        # here one can also implement the chekc from above.
276                        for k, line in enumerate(fp):
277                            if(k >= start_read and k < start_read + T):
278                                floats = list(map(float, line.split()))
279                                if version == "0.0":
280                                    deltas[k - start][i][cnfg] = floats[im - single]
281                                else:
282                                    deltas[k - start_read][i][cnfg] = floats[1 + im - single]
283
284    else:
285        if "files" in kwargs:
286            ls = kwargs.get("files")
287        else:
288            for exc in ls:
289                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
290                    ls = list(set(ls) - set([exc]))
291                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
292        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
293        if b2b:
294            pattern += '\nwf_2      ' + str(wf2)
295        for rep, file in enumerate(ls):
296            rep_idl = []
297            with open(path + '/' + file, 'r') as fp:
298                content = fp.readlines()
299                data_starts = []
300                for linenumber, line in enumerate(content):
301                    if "[run]" in line:
302                        data_starts.append(linenumber)
303                if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
304                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
305                chunk = content[:data_starts[1]]
306                for linenumber, line in enumerate(chunk):
307                    if line.startswith("gauge_name"):
308                        gauge_line = linenumber
309                    elif line.startswith("[correlator]"):
310                        corr_line = linenumber
311                        found_pat = ""
312                        for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
313                            found_pat += li
314                        if re.search(pattern, found_pat):
315                            start_read = corr_line + 7 + b2b
316                            break
317                endline = corr_line + 6 + b2b
318                while not chunk[endline] == "\n":
319                    endline += 1
320                T = endline - start_read
321                if rep == 0:
322                    deltas = []
323                    for t in range(T):
324                        deltas.append([])
325                for t in range(T):
326                    deltas[t].append(np.zeros(len(data_starts)))
327                # all other chunks should follow the same structure
328                for cnfg in range(len(data_starts)):
329                    start = data_starts[cnfg]
330                    stop = start + data_starts[1]
331                    chunk = content[start:stop]
332                    try:
333                        rep_idl.append(int(chunk[gauge_line].split(cfg_separator)[-1]))
334                    except Exception:
335                        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
336
337                    found_pat = ""
338                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
339                        found_pat += li
340                    if re.search(pattern, found_pat):
341                        for t, line in enumerate(chunk[start_read:start_read + T]):
342                            floats = list(map(float, line.split()))
343                            deltas[t][rep][cnfg] = floats[im + 1 - single]
344            idl.append(rep_idl)
345
346    if "check_configs" in kwargs:
347        print("Checking for missing configs...")
348        che = kwargs.get("check_configs")
349        if not (len(che) == len(idl)):
350            raise Exception("check_configs has to be the same length as replica!")
351        for r in range(len(idl)):
352            print("checking " + new_names[r])
353            utils.check_idl(idl[r], che[r])
354        print("Done")
355    result = []
356    for t in range(T):
357        result.append(Obs(deltas[t], new_names, idl=idl))
358    return result

Read sfcf c format from given folder structure.

Parameters
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • corr_type (str): change between bi (boundary - inner) (default) bib (boundary - inner - boundary) and bb (boundary - boundary) correlator types
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs:: list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs