pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from . import utils
  7
  8
  9def read_sfcf(path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", **kwargs):
 10    """Read sfcf c format from given folder structure.
 11
 12    Parameters
 13    ----------
 14    path : str
 15        Path to the sfcf files.
 16    prefix : str
 17        Prefix of the sfcf files.
 18    name : str
 19        Name of the correlation function to read.
 20    quarks : str
 21        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 22        for version 0.0 this does NOT need to be given with the typical " - "
 23        that is present in the output file,
 24        this is done automatically for this version
 25    corr_type : str
 26        Type of correlation function to read. Can be
 27        - 'bi' for boundary-inner
 28        - 'bb' for boundary-boundary
 29        - 'bib' for boundary-inner-boundary
 30    noffset : int
 31        Offset of the source (only relevant when wavefunctions are used)
 32    wf : int
 33        ID of wave function
 34    wf2 : int
 35        ID of the second wavefunction
 36        (only relevant for boundary-to-boundary correlation functions)
 37    im : bool
 38        if True, read imaginary instead of real part
 39        of the correlation function.
 40    names : list
 41        Alternative labeling for replicas/ensembles.
 42        Has to have the appropriate length
 43    ens_name : str
 44        replaces the name of the ensemble
 45    version: str
 46        version of SFCF, with which the measurement was done.
 47        if the compact output option (-c) was specified,
 48        append a "c" to the version (e.g. "1.0c")
 49        if the append output option (-a) was specified,
 50        append an "a" to the version
 51    cfg_separator : str
 52        String that separates the ensemble identifier from the configuration number (default 'n').
 53    replica: list
 54        list of replica to be read, default is all
 55    files: list
 56        list of files to be read per replica, default is all.
 57        for non-compact output format, hand the folders to be read here.
 58    check_configs: list[list[int]]
 59        list of list of supposed configs, eg. [range(1,1000)]
 60        for one replicum with 1000 configs
 61
 62    Returns
 63    -------
 64    result: list[Obs]
 65        list of Observables with length T, observable per timeslice.
 66        bb-type correlators have length 1.
 67    """
 68    if kwargs.get('im'):
 69        im = 1
 70        part = 'imaginary'
 71    else:
 72        im = 0
 73        part = 'real'
 74    if "replica" in kwargs:
 75        reps = kwargs.get("replica")
 76    if corr_type == 'bb':
 77        b2b = True
 78        single = True
 79    elif corr_type == 'bib':
 80        b2b = True
 81        single = False
 82    else:
 83        b2b = False
 84        single = False
 85    compact = True
 86    appended = False
 87    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
 88
 89    if version not in known_versions:
 90        raise Exception("This version is not known!")
 91    if (version[-1] == "c"):
 92        appended = False
 93        compact = True
 94        version = version[:-1]
 95    elif (version[-1] == "a"):
 96        appended = True
 97        compact = False
 98        version = version[:-1]
 99    else:
100        compact = False
101        appended = False
102    read = 0
103    T = 0
104    start = 0
105    ls = []
106    if "replica" in kwargs:
107        ls = reps
108    else:
109        for (dirpath, dirnames, filenames) in os.walk(path):
110            if not appended:
111                ls.extend(dirnames)
112            else:
113                ls.extend(filenames)
114            break
115        if not ls:
116            raise Exception('Error, directory not found')
117        # Exclude folders with different names
118        for exc in ls:
119            if not fnmatch.fnmatch(exc, prefix + '*'):
120                ls = list(set(ls) - set([exc]))
121
122    if not appended:
123        if len(ls) > 1:
124            # New version, to cope with ids, etc.
125            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
126        replica = len(ls)
127    else:
128        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
129    print('Read', part, 'part of', name, 'from', prefix[:-1],
130          ',', replica, 'replica')
131    if 'names' in kwargs:
132        new_names = kwargs.get('names')
133        if len(new_names) != len(set(new_names)):
134            raise Exception("names are not unique!")
135        if len(new_names) != replica:
136            raise Exception('Names does not have the required length', replica)
137    else:
138        new_names = []
139        if not appended:
140            for entry in ls:
141                try:
142                    idx = entry.index('r')
143                except Exception:
144                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
145
146                if 'ens_name' in kwargs:
147                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
148                else:
149                    new_names.append(entry[:idx] + '|' + entry[idx:])
150        else:
151
152            for exc in ls:
153                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
154                    ls = list(set(ls) - set([exc]))
155            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
156            for entry in ls:
157                myentry = entry[:-len(name) - 1]
158                try:
159                    idx = myentry.index('r')
160                except Exception:
161                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
162
163                if 'ens_name' in kwargs:
164                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
165                else:
166                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
167    idl = []
168    if not appended:
169        for i, item in enumerate(ls):
170            sub_ls = []
171            if "files" in kwargs:
172                sub_ls = kwargs.get("files")
173                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
174            else:
175                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
176                    if compact:
177                        sub_ls.extend(filenames)
178                    else:
179                        sub_ls.extend(dirnames)
180                    break
181                if compact:
182                    for exc in sub_ls:
183                        if not fnmatch.fnmatch(exc, prefix + '*'):
184                            sub_ls = list(set(sub_ls) - set([exc]))
185                    sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
186                else:
187                    for exc in sub_ls:
188                        if not fnmatch.fnmatch(exc, 'cfg*'):
189                            sub_ls = list(set(sub_ls) - set([exc]))
190                    sub_ls.sort(key=lambda x: int(x[3:]))
191            rep_idl = []
192            no_cfg = len(sub_ls)
193            for cfg in sub_ls:
194                try:
195                    if compact:
196                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
197                    else:
198                        rep_idl.append(int(cfg[3:]))
199                except Exception:
200                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
201            rep_idl.sort()
202            # maybe there is a better way to print the idls
203            print(item, ':', no_cfg, ' configurations')
204            idl.append(rep_idl)
205            # here we have found all the files we need to look into.
206            if i == 0:
207                # here, we want to find the place within the file,
208                # where the correlator we need is stored.
209                # to do so, the pattern needed is put together
210                # from the input values
211                if version == "0.0":
212                    pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
213                    # if b2b, a second wf is needed
214                    if b2b:
215                        pattern += ", wf_2 " + str(wf2)
216                    qs = quarks.split(" ")
217                    pattern += " : " + qs[0] + " - " + qs[1]
218                    file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
219                    for k, line in enumerate(file):
220                        if read == 1 and not line.strip() and k > start + 1:
221                            break
222                        if read == 1 and k >= start:
223                            T += 1
224                        if pattern in line:
225                            read = 1
226                            start = k + 1
227                    print(str(T) + " entries found.")
228                    file.close()
229                else:
230                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
231                    if b2b:
232                        pattern += '\nwf_2      ' + str(wf2)
233                    # and the file is parsed through to find the pattern
234                    if compact:
235                        file = open(path + '/' + item + '/' + sub_ls[0], "r")
236                    else:
237                        # for non-compactified versions of the files
238                        file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
239
240                    content = file.read()
241                    match = re.search(pattern, content)
242                    if match:
243                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
244                        end_match = re.search(r'\n\s*\n', content[match.start():])
245                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
246                        assert T > 0
247                        print(T, 'entries, starting to read in line', start_read)
248                        file.close()
249                    else:
250                        file.close()
251                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
252
253                # we found where the correlator
254                # that is to be read is in the files
255                # after preparing the datastructure
256                # the correlators get parsed into...
257                deltas = []
258                for j in range(T):
259                    deltas.append([])
260
261            for t in range(T):
262                deltas[t].append(np.zeros(no_cfg))
263            if compact:
264                for cfg in range(no_cfg):
265                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
266                        lines = fp.readlines()
267                        # check, if the correlator is in fact
268                        # printed completely
269                        if (start_read + T > len(lines)):
270                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
271                        # and start to read the correlator.
272                        # the range here is chosen like this,
273                        # since this allows for implementing
274                        # a security check for every read correlator later...
275                        for k in range(start_read - 6, start_read + T):
276                            if k == start_read - 5 - b2b:
277                                if lines[k].strip() != 'name      ' + name:
278                                    raise Exception('Wrong format', sub_ls[cfg])
279                            if (k >= start_read and k < start_read + T):
280                                floats = list(map(float, lines[k].split()))
281                                deltas[k - start_read][i][cfg] = floats[-2:][im]
282            else:
283                for cnfg, subitem in enumerate(sub_ls):
284                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
285                        # since the non-compatified files
286                        # are typically not so long,
287                        # we can iterate over the whole file.
288                        # here one can also implement the chekc from above.
289                        for k, line in enumerate(fp):
290                            if (k >= start_read and k < start_read + T):
291                                floats = list(map(float, line.split()))
292                                if version == "0.0":
293                                    deltas[k - start][i][cnfg] = floats[im - single]
294                                else:
295                                    deltas[k - start_read][i][cnfg] = floats[1 + im - single]
296
297    else:
298        if "files" in kwargs:
299            ls = kwargs.get("files")
300        else:
301            for exc in ls:
302                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
303                    ls = list(set(ls) - set([exc]))
304                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
305        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
306        if b2b:
307            pattern += '\nwf_2      ' + str(wf2)
308        for rep, file in enumerate(ls):
309            rep_idl = []
310            with open(path + '/' + file, 'r') as fp:
311                content = fp.readlines()
312                data_starts = []
313                for linenumber, line in enumerate(content):
314                    if "[run]" in line:
315                        data_starts.append(linenumber)
316                if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
317                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
318                chunk = content[:data_starts[1]]
319                for linenumber, line in enumerate(chunk):
320                    if line.startswith("gauge_name"):
321                        gauge_line = linenumber
322                    elif line.startswith("[correlator]"):
323                        corr_line = linenumber
324                        found_pat = ""
325                        for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
326                            found_pat += li
327                        if re.search(pattern, found_pat):
328                            start_read = corr_line + 7 + b2b
329                            break
330                endline = corr_line + 6 + b2b
331                while not chunk[endline] == "\n":
332                    endline += 1
333                T = endline - start_read
334                if rep == 0:
335                    deltas = []
336                    for t in range(T):
337                        deltas.append([])
338                for t in range(T):
339                    deltas[t].append(np.zeros(len(data_starts)))
340                # all other chunks should follow the same structure
341                for cnfg in range(len(data_starts)):
342                    start = data_starts[cnfg]
343                    stop = start + data_starts[1]
344                    chunk = content[start:stop]
345                    try:
346                        rep_idl.append(int(chunk[gauge_line].split(cfg_separator)[-1]))
347                    except Exception:
348                        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
349
350                    found_pat = ""
351                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
352                        found_pat += li
353                    if re.search(pattern, found_pat):
354                        for t, line in enumerate(chunk[start_read:start_read + T]):
355                            floats = list(map(float, line.split()))
356                            deltas[t][rep][cnfg] = floats[im + 1 - single]
357            idl.append(rep_idl)
358
359    if "check_configs" in kwargs:
360        print("Checking for missing configs...")
361        che = kwargs.get("check_configs")
362        if not (len(che) == len(idl)):
363            raise Exception("check_configs has to be the same length as replica!")
364        for r in range(len(idl)):
365            print("checking " + new_names[r])
366            utils.check_idl(idl[r], che[r])
367        print("Done")
368    result = []
369    for t in range(T):
370        result.append(Obs(deltas[t], new_names, idl=idl))
371    return result
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', **kwargs):
 10def read_sfcf(path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", **kwargs):
 11    """Read sfcf c format from given folder structure.
 12
 13    Parameters
 14    ----------
 15    path : str
 16        Path to the sfcf files.
 17    prefix : str
 18        Prefix of the sfcf files.
 19    name : str
 20        Name of the correlation function to read.
 21    quarks : str
 22        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 23        for version 0.0 this does NOT need to be given with the typical " - "
 24        that is present in the output file,
 25        this is done automatically for this version
 26    corr_type : str
 27        Type of correlation function to read. Can be
 28        - 'bi' for boundary-inner
 29        - 'bb' for boundary-boundary
 30        - 'bib' for boundary-inner-boundary
 31    noffset : int
 32        Offset of the source (only relevant when wavefunctions are used)
 33    wf : int
 34        ID of wave function
 35    wf2 : int
 36        ID of the second wavefunction
 37        (only relevant for boundary-to-boundary correlation functions)
 38    im : bool
 39        if True, read imaginary instead of real part
 40        of the correlation function.
 41    names : list
 42        Alternative labeling for replicas/ensembles.
 43        Has to have the appropriate length
 44    ens_name : str
 45        replaces the name of the ensemble
 46    version: str
 47        version of SFCF, with which the measurement was done.
 48        if the compact output option (-c) was specified,
 49        append a "c" to the version (e.g. "1.0c")
 50        if the append output option (-a) was specified,
 51        append an "a" to the version
 52    cfg_separator : str
 53        String that separates the ensemble identifier from the configuration number (default 'n').
 54    replica: list
 55        list of replica to be read, default is all
 56    files: list
 57        list of files to be read per replica, default is all.
 58        for non-compact output format, hand the folders to be read here.
 59    check_configs: list[list[int]]
 60        list of list of supposed configs, eg. [range(1,1000)]
 61        for one replicum with 1000 configs
 62
 63    Returns
 64    -------
 65    result: list[Obs]
 66        list of Observables with length T, observable per timeslice.
 67        bb-type correlators have length 1.
 68    """
 69    if kwargs.get('im'):
 70        im = 1
 71        part = 'imaginary'
 72    else:
 73        im = 0
 74        part = 'real'
 75    if "replica" in kwargs:
 76        reps = kwargs.get("replica")
 77    if corr_type == 'bb':
 78        b2b = True
 79        single = True
 80    elif corr_type == 'bib':
 81        b2b = True
 82        single = False
 83    else:
 84        b2b = False
 85        single = False
 86    compact = True
 87    appended = False
 88    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
 89
 90    if version not in known_versions:
 91        raise Exception("This version is not known!")
 92    if (version[-1] == "c"):
 93        appended = False
 94        compact = True
 95        version = version[:-1]
 96    elif (version[-1] == "a"):
 97        appended = True
 98        compact = False
 99        version = version[:-1]
100    else:
101        compact = False
102        appended = False
103    read = 0
104    T = 0
105    start = 0
106    ls = []
107    if "replica" in kwargs:
108        ls = reps
109    else:
110        for (dirpath, dirnames, filenames) in os.walk(path):
111            if not appended:
112                ls.extend(dirnames)
113            else:
114                ls.extend(filenames)
115            break
116        if not ls:
117            raise Exception('Error, directory not found')
118        # Exclude folders with different names
119        for exc in ls:
120            if not fnmatch.fnmatch(exc, prefix + '*'):
121                ls = list(set(ls) - set([exc]))
122
123    if not appended:
124        if len(ls) > 1:
125            # New version, to cope with ids, etc.
126            ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
127        replica = len(ls)
128    else:
129        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
130    print('Read', part, 'part of', name, 'from', prefix[:-1],
131          ',', replica, 'replica')
132    if 'names' in kwargs:
133        new_names = kwargs.get('names')
134        if len(new_names) != len(set(new_names)):
135            raise Exception("names are not unique!")
136        if len(new_names) != replica:
137            raise Exception('Names does not have the required length', replica)
138    else:
139        new_names = []
140        if not appended:
141            for entry in ls:
142                try:
143                    idx = entry.index('r')
144                except Exception:
145                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
146
147                if 'ens_name' in kwargs:
148                    new_names.append(kwargs.get('ens_name') + '|' + entry[idx:])
149                else:
150                    new_names.append(entry[:idx] + '|' + entry[idx:])
151        else:
152
153            for exc in ls:
154                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
155                    ls = list(set(ls) - set([exc]))
156            ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
157            for entry in ls:
158                myentry = entry[:-len(name) - 1]
159                try:
160                    idx = myentry.index('r')
161                except Exception:
162                    raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
163
164                if 'ens_name' in kwargs:
165                    new_names.append(kwargs.get('ens_name') + '|' + myentry[idx:])
166                else:
167                    new_names.append(myentry[:idx] + '|' + myentry[idx:])
168    idl = []
169    if not appended:
170        for i, item in enumerate(ls):
171            sub_ls = []
172            if "files" in kwargs:
173                sub_ls = kwargs.get("files")
174                sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
175            else:
176                for (dirpath, dirnames, filenames) in os.walk(path + '/' + item):
177                    if compact:
178                        sub_ls.extend(filenames)
179                    else:
180                        sub_ls.extend(dirnames)
181                    break
182                if compact:
183                    for exc in sub_ls:
184                        if not fnmatch.fnmatch(exc, prefix + '*'):
185                            sub_ls = list(set(sub_ls) - set([exc]))
186                    sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
187                else:
188                    for exc in sub_ls:
189                        if not fnmatch.fnmatch(exc, 'cfg*'):
190                            sub_ls = list(set(sub_ls) - set([exc]))
191                    sub_ls.sort(key=lambda x: int(x[3:]))
192            rep_idl = []
193            no_cfg = len(sub_ls)
194            for cfg in sub_ls:
195                try:
196                    if compact:
197                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
198                    else:
199                        rep_idl.append(int(cfg[3:]))
200                except Exception:
201                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
202            rep_idl.sort()
203            # maybe there is a better way to print the idls
204            print(item, ':', no_cfg, ' configurations')
205            idl.append(rep_idl)
206            # here we have found all the files we need to look into.
207            if i == 0:
208                # here, we want to find the place within the file,
209                # where the correlator we need is stored.
210                # to do so, the pattern needed is put together
211                # from the input values
212                if version == "0.0":
213                    pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
214                    # if b2b, a second wf is needed
215                    if b2b:
216                        pattern += ", wf_2 " + str(wf2)
217                    qs = quarks.split(" ")
218                    pattern += " : " + qs[0] + " - " + qs[1]
219                    file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
220                    for k, line in enumerate(file):
221                        if read == 1 and not line.strip() and k > start + 1:
222                            break
223                        if read == 1 and k >= start:
224                            T += 1
225                        if pattern in line:
226                            read = 1
227                            start = k + 1
228                    print(str(T) + " entries found.")
229                    file.close()
230                else:
231                    pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
232                    if b2b:
233                        pattern += '\nwf_2      ' + str(wf2)
234                    # and the file is parsed through to find the pattern
235                    if compact:
236                        file = open(path + '/' + item + '/' + sub_ls[0], "r")
237                    else:
238                        # for non-compactified versions of the files
239                        file = open(path + '/' + item + '/' + sub_ls[0] + '/' + name, "r")
240
241                    content = file.read()
242                    match = re.search(pattern, content)
243                    if match:
244                        start_read = content.count('\n', 0, match.start()) + 5 + b2b
245                        end_match = re.search(r'\n\s*\n', content[match.start():])
246                        T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
247                        assert T > 0
248                        print(T, 'entries, starting to read in line', start_read)
249                        file.close()
250                    else:
251                        file.close()
252                        raise Exception('Correlator with pattern\n' + pattern + '\nnot found.')
253
254                # we found where the correlator
255                # that is to be read is in the files
256                # after preparing the datastructure
257                # the correlators get parsed into...
258                deltas = []
259                for j in range(T):
260                    deltas.append([])
261
262            for t in range(T):
263                deltas[t].append(np.zeros(no_cfg))
264            if compact:
265                for cfg in range(no_cfg):
266                    with open(path + '/' + item + '/' + sub_ls[cfg]) as fp:
267                        lines = fp.readlines()
268                        # check, if the correlator is in fact
269                        # printed completely
270                        if (start_read + T > len(lines)):
271                            raise Exception("EOF before end of correlator data! Maybe " + path + '/' + item + '/' + sub_ls[cfg] + " is corrupted?")
272                        # and start to read the correlator.
273                        # the range here is chosen like this,
274                        # since this allows for implementing
275                        # a security check for every read correlator later...
276                        for k in range(start_read - 6, start_read + T):
277                            if k == start_read - 5 - b2b:
278                                if lines[k].strip() != 'name      ' + name:
279                                    raise Exception('Wrong format', sub_ls[cfg])
280                            if (k >= start_read and k < start_read + T):
281                                floats = list(map(float, lines[k].split()))
282                                deltas[k - start_read][i][cfg] = floats[-2:][im]
283            else:
284                for cnfg, subitem in enumerate(sub_ls):
285                    with open(path + '/' + item + '/' + subitem + '/' + name) as fp:
286                        # since the non-compatified files
287                        # are typically not so long,
288                        # we can iterate over the whole file.
289                        # here one can also implement the chekc from above.
290                        for k, line in enumerate(fp):
291                            if (k >= start_read and k < start_read + T):
292                                floats = list(map(float, line.split()))
293                                if version == "0.0":
294                                    deltas[k - start][i][cnfg] = floats[im - single]
295                                else:
296                                    deltas[k - start_read][i][cnfg] = floats[1 + im - single]
297
298    else:
299        if "files" in kwargs:
300            ls = kwargs.get("files")
301        else:
302            for exc in ls:
303                if not fnmatch.fnmatch(exc, prefix + '*.' + name):
304                    ls = list(set(ls) - set([exc]))
305                ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
306        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
307        if b2b:
308            pattern += '\nwf_2      ' + str(wf2)
309        for rep, file in enumerate(ls):
310            rep_idl = []
311            with open(path + '/' + file, 'r') as fp:
312                content = fp.readlines()
313                data_starts = []
314                for linenumber, line in enumerate(content):
315                    if "[run]" in line:
316                        data_starts.append(linenumber)
317                if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
318                    raise Exception("Irregularities in file structure found, not all runs have the same output length")
319                chunk = content[:data_starts[1]]
320                for linenumber, line in enumerate(chunk):
321                    if line.startswith("gauge_name"):
322                        gauge_line = linenumber
323                    elif line.startswith("[correlator]"):
324                        corr_line = linenumber
325                        found_pat = ""
326                        for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
327                            found_pat += li
328                        if re.search(pattern, found_pat):
329                            start_read = corr_line + 7 + b2b
330                            break
331                endline = corr_line + 6 + b2b
332                while not chunk[endline] == "\n":
333                    endline += 1
334                T = endline - start_read
335                if rep == 0:
336                    deltas = []
337                    for t in range(T):
338                        deltas.append([])
339                for t in range(T):
340                    deltas[t].append(np.zeros(len(data_starts)))
341                # all other chunks should follow the same structure
342                for cnfg in range(len(data_starts)):
343                    start = data_starts[cnfg]
344                    stop = start + data_starts[1]
345                    chunk = content[start:stop]
346                    try:
347                        rep_idl.append(int(chunk[gauge_line].split(cfg_separator)[-1]))
348                    except Exception:
349                        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
350
351                    found_pat = ""
352                    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
353                        found_pat += li
354                    if re.search(pattern, found_pat):
355                        for t, line in enumerate(chunk[start_read:start_read + T]):
356                            floats = list(map(float, line.split()))
357                            deltas[t][rep][cnfg] = floats[im + 1 - single]
358            idl.append(rep_idl)
359
360    if "check_configs" in kwargs:
361        print("Checking for missing configs...")
362        che = kwargs.get("check_configs")
363        if not (len(che) == len(idl)):
364            raise Exception("check_configs has to be the same length as replica!")
365        for r in range(len(idl)):
366            print("checking " + new_names[r])
367            utils.check_idl(idl[r], che[r])
368        print("Done")
369    result = []
370    for t in range(T):
371        result.append(Obs(deltas[t], new_names, idl=idl))
372    return result

Read sfcf c format from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type (str): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.