pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from .utils import sort_names, check_idl
  7import itertools
  8
  9
 10sep = "/"
 11
 12
 13def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
 14    """Read sfcf files from given folder structure.
 15
 16    Parameters
 17    ----------
 18    path : str
 19        Path to the sfcf files.
 20    prefix : str
 21        Prefix of the sfcf files.
 22    name : str
 23        Name of the correlation function to read.
 24    quarks : str
 25        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 26        for version 0.0 this does NOT need to be given with the typical " - "
 27        that is present in the output file,
 28        this is done automatically for this version
 29    corr_type : str
 30        Type of correlation function to read. Can be
 31        - 'bi' for boundary-inner
 32        - 'bb' for boundary-boundary
 33        - 'bib' for boundary-inner-boundary
 34    noffset : int
 35        Offset of the source (only relevant when wavefunctions are used)
 36    wf : int
 37        ID of wave function
 38    wf2 : int
 39        ID of the second wavefunction
 40        (only relevant for boundary-to-boundary correlation functions)
 41    im : bool
 42        if True, read imaginary instead of real part
 43        of the correlation function.
 44    names : list
 45        Alternative labeling for replicas/ensembles.
 46        Has to have the appropriate length
 47    ens_name : str
 48        replaces the name of the ensemble
 49    version: str
 50        version of SFCF, with which the measurement was done.
 51        if the compact output option (-c) was specified,
 52        append a "c" to the version (e.g. "1.0c")
 53        if the append output option (-a) was specified,
 54        append an "a" to the version
 55    cfg_separator : str
 56        String that separates the ensemble identifier from the configuration number (default 'n').
 57    replica: list
 58        list of replica to be read, default is all
 59    files: list
 60        list of files to be read per replica, default is all.
 61        for non-compact output format, hand the folders to be read here.
 62    check_configs: list[list[int]]
 63        list of list of supposed configs, eg. [range(1,1000)]
 64        for one replicum with 1000 configs
 65
 66    Returns
 67    -------
 68    result: list[Obs]
 69        list of Observables with length T, observable per timeslice.
 70        bb-type correlators have length 1.
 71    """
 72    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
 73                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
 74                          cfg_separator=cfg_separator, silent=silent, **kwargs)
 75    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]
 76
 77
 78def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 79    """Read sfcf files from given folder structure.
 80
 81    Parameters
 82    ----------
 83    path : str
 84        Path to the sfcf files.
 85    prefix : str
 86        Prefix of the sfcf files.
 87    name : str
 88        Name of the correlation function to read.
 89    quarks_list : list[str]
 90        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 91        for version 0.0 this does NOT need to be given with the typical " - "
 92        that is present in the output file,
 93        this is done automatically for this version
 94    corr_type_list : list[str]
 95        Type of correlation function to read. Can be
 96        - 'bi' for boundary-inner
 97        - 'bb' for boundary-boundary
 98        - 'bib' for boundary-inner-boundary
 99    noffset_list : list[int]
100        Offset of the source (only relevant when wavefunctions are used)
101    wf_list : int
102        ID of wave function
103    wf2_list : list[int]
104        ID of the second wavefunction
105        (only relevant for boundary-to-boundary correlation functions)
106    im : bool
107        if True, read imaginary instead of real part
108        of the correlation function.
109    names : list
110        Alternative labeling for replicas/ensembles.
111        Has to have the appropriate length
112    ens_name : str
113        replaces the name of the ensemble
114    version: str
115        version of SFCF, with which the measurement was done.
116        if the compact output option (-c) was specified,
117        append a "c" to the version (e.g. "1.0c")
118        if the append output option (-a) was specified,
119        append an "a" to the version
120    cfg_separator : str
121        String that separates the ensemble identifier from the configuration number (default 'n').
122    replica: list
123        list of replica to be read, default is all
124    files: list[list[int]]
125        list of files to be read per replica, default is all.
126        for non-compact output format, hand the folders to be read here.
127    check_configs: list[list[int]]
128        list of list of supposed configs, eg. [range(1,1000)]
129        for one replicum with 1000 configs
130    rep_string: str
131        Separator of ensemble name and replicum. Example: In "ensAr0", "r" would be the separator string.
132    Returns
133    -------
134    result: dict[list[Obs]]
135        dict with one of the following properties:
136        if keyed_out:
137            dict[key] = list[Obs]
138            where key has the form name/quarks/offset/wf/wf2
139        if not keyed_out:
140            dict[name][quarks][offset][wf][wf2] = list[Obs]
141    """
142
143    if kwargs.get('im'):
144        im = 1
145        part = 'imaginary'
146    else:
147        im = 0
148        part = 'real'
149
150    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
151
152    if version not in known_versions:
153        raise Exception("This version is not known!")
154    if (version[-1] == "c"):
155        appended = False
156        compact = True
157        version = version[:-1]
158    elif (version[-1] == "a"):
159        appended = True
160        compact = False
161        version = version[:-1]
162    else:
163        compact = False
164        appended = False
165    ls = []
166    if "replica" in kwargs:
167        ls = kwargs.get("replica")
168    else:
169        for (dirpath, dirnames, filenames) in os.walk(path):
170            if not appended:
171                ls.extend(dirnames)
172            else:
173                ls.extend(filenames)
174            break
175        if not ls:
176            raise Exception('Error, directory not found')
177        # Exclude folders with different names
178        for exc in ls:
179            if not fnmatch.fnmatch(exc, prefix + '*'):
180                ls = list(set(ls) - set([exc]))
181
182    if not appended:
183        ls = sort_names(ls)
184        replica = len(ls)
185
186    else:
187        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
188    if replica == 0:
189        raise Exception('No replica found in directory')
190    if not silent:
191        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
192
193    if 'names' in kwargs:
194        new_names = kwargs.get('names')
195        if len(new_names) != len(set(new_names)):
196            raise Exception("names are not unique!")
197        if len(new_names) != replica:
198            raise Exception('names should have the length', replica)
199
200    else:
201        ens_name = kwargs.get("ens_name")
202        if not appended:
203            new_names = _get_rep_names(ls, ens_name, rep_sep=(kwargs.get('rep_string', 'r')))
204        else:
205            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name, rep_sep=(kwargs.get('rep_string', 'r')))
206        new_names = sort_names(new_names)
207
208    idl = []
209
210    noffset_list = [str(x) for x in noffset_list]
211    wf_list = [str(x) for x in wf_list]
212    wf2_list = [str(x) for x in wf2_list]
213
214    # setup dict structures
215    intern = {}
216    for name, corr_type in zip(name_list, corr_type_list):
217        intern[name] = {}
218        b2b, single = _extract_corr_type(corr_type)
219        intern[name]["b2b"] = b2b
220        intern[name]["single"] = single
221        intern[name]["spec"] = {}
222        for quarks in quarks_list:
223            intern[name]["spec"][quarks] = {}
224            for off in noffset_list:
225                intern[name]["spec"][quarks][off] = {}
226                for w in wf_list:
227                    intern[name]["spec"][quarks][off][w] = {}
228                    if b2b:
229                        for w2 in wf2_list:
230                            intern[name]["spec"][quarks][off][w][w2] = {}
231                            intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
232                    else:
233                        intern[name]["spec"][quarks][off][w]["0"] = {}
234                        intern[name]["spec"][quarks][off][w]["0"]["pattern"] = _make_pattern(version, name, off, w, 0, intern[name]['b2b'], quarks)
235
236    internal_ret_dict = {}
237    needed_keys = []
238    for name, corr_type in zip(name_list, corr_type_list):
239        b2b, single = _extract_corr_type(corr_type)
240        if b2b:
241            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, wf2_list))
242        else:
243            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, ["0"]))
244
245    for key in needed_keys:
246        internal_ret_dict[key] = []
247
248    if not appended:
249        for i, item in enumerate(ls):
250            rep_path = path + '/' + item
251            if "files" in kwargs:
252                files = kwargs.get("files")
253                if isinstance(files, list):
254                    if all(isinstance(f, list) for f in files):
255                        files = files[i]
256                    elif all(isinstance(f, str) for f in files):
257                        files = files
258                    else:
259                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
260                else:
261                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
262
263            else:
264                files = []
265            sub_ls = _find_files(rep_path, prefix, compact, files)
266            rep_idl = []
267            no_cfg = len(sub_ls)
268            for cfg in sub_ls:
269                try:
270                    if compact:
271                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
272                    else:
273                        rep_idl.append(int(cfg[3:]))
274                except Exception:
275                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
276            rep_idl.sort()
277            # maybe there is a better way to print the idls
278            if not silent:
279                print(item, ':', no_cfg, ' configurations')
280            idl.append(rep_idl)
281            # here we have found all the files we need to look into.
282            if i == 0:
283                if version != "0.0" and compact:
284                    file = path + '/' + item + '/' + sub_ls[0]
285                for name_index, name in enumerate(name_list):
286                    if version == "0.0" or not compact:
287                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
288                    if corr_type_list[name_index] == 'bi':
289                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, ["0"])
290                    else:
291                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, wf2_list)
292                    for key in name_keys:
293                        specs = _key2specs(key)
294                        quarks = specs[0]
295                        off = specs[1]
296                        w = specs[2]
297                        w2 = specs[3]
298                        # here, we want to find the place within the file,
299                        # where the correlator we need is stored.
300                        # to do so, the pattern needed is put together
301                        # from the input values
302                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
303                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
304                        intern[name]["T"] = T
305                        # preparing the datastructure
306                        # the correlators get parsed into...
307                        deltas = []
308                        for j in range(intern[name]["T"]):
309                            deltas.append([])
310                        internal_ret_dict[sep.join([name, key])] = deltas
311
312            if compact:
313                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
314                for key in needed_keys:
315                    name = _key2specs(key)[0]
316                    for t in range(intern[name]["T"]):
317                        internal_ret_dict[key][t].append(rep_deltas[key][t])
318            else:
319                for key in needed_keys:
320                    rep_data = []
321                    name = _key2specs(key)[0]
322                    for subitem in sub_ls:
323                        cfg_path = path + '/' + item + '/' + subitem
324                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
325                        rep_data.append(file_data)
326                    for t in range(intern[name]["T"]):
327                        internal_ret_dict[key][t].append([])
328                        for cfg in range(no_cfg):
329                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
330    else:
331        for key in needed_keys:
332            specs = _key2specs(key)
333            name = specs[0]
334            quarks = specs[1]
335            off = specs[2]
336            w = specs[3]
337            w2 = specs[4]
338            if "files" in kwargs:
339                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
340                    name_ls = kwargs.get("files")
341                else:
342                    raise TypeError("In append mode, files has to be of type list[str]!")
343            else:
344                name_ls = ls
345                for exc in name_ls:
346                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
347                        name_ls = list(set(name_ls) - set([exc]))
348            name_ls = sort_names(name_ls)
349            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
350            deltas = []
351            for rep, file in enumerate(name_ls):
352                rep_idl = []
353                filename = path + '/' + file
354                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
355                if rep == 0:
356                    intern[name]['T'] = T
357                    for t in range(intern[name]['T']):
358                        deltas.append([])
359                for t in range(intern[name]['T']):
360                    deltas[t].append(rep_data[t])
361                internal_ret_dict[key] = deltas
362                if name == name_list[0]:
363                    idl.append(rep_idl)
364
365    if kwargs.get("check_configs") is True:
366        if not silent:
367            print("Checking for missing configs...")
368        che = kwargs.get("check_configs")
369        if not (len(che) == len(idl)):
370            raise Exception("check_configs has to be the same length as replica!")
371        for r in range(len(idl)):
372            if not silent:
373                print("checking " + new_names[r])
374            check_idl(idl[r], che[r])
375        if not silent:
376            print("Done")
377
378    result_dict = {}
379    if keyed_out:
380        for key in needed_keys:
381            name = _key2specs(key)[0]
382            result = []
383            for t in range(intern[name]["T"]):
384                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
385            result_dict[key] = result
386    else:
387        for name, corr_type in zip(name_list, corr_type_list):
388            result_dict[name] = {}
389            for quarks in quarks_list:
390                result_dict[name][quarks] = {}
391                for off in noffset_list:
392                    result_dict[name][quarks][off] = {}
393                    for w in wf_list:
394                        result_dict[name][quarks][off][w] = {}
395                        if corr_type != 'bi':
396                            for w2 in wf2_list:
397                                key = _specs2key(name, quarks, off, w, w2)
398                                result = []
399                                for t in range(intern[name]["T"]):
400                                    result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
401                                result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
402                        else:
403                            key = _specs2key(name, quarks, off, w, "0")
404                            result = []
405                            for t in range(intern[name]["T"]):
406                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
407                            result_dict[name][quarks][str(off)][str(w)][str(0)] = result
408    return result_dict
409
410
411def _lists2key(*lists):
412    keys = []
413    for tup in itertools.product(*lists):
414        keys.append(sep.join(tup))
415    return keys
416
417
418def _key2specs(key):
419    return key.split(sep)
420
421
422def _specs2key(*specs):
423    return sep.join(specs)
424
425
426def _read_o_file(cfg_path, name, needed_keys, intern, version, im):
427    return_vals = {}
428    for key in needed_keys:
429        file = cfg_path + '/' + name
430        specs = _key2specs(key)
431        if specs[0] == name:
432            with open(file) as fp:
433                lines = fp.readlines()
434                quarks = specs[1]
435                off = specs[2]
436                w = specs[3]
437                w2 = specs[4]
438                T = intern[name]["T"]
439                start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
440                deltas = []
441                for line in lines[start_read:start_read + T]:
442                    floats = list(map(float, line.split()))
443                    if version == "0.0":
444                        deltas.append(floats[im - intern[name]["single"]])
445                    else:
446                        deltas.append(floats[1 + im - intern[name]["single"]])
447                return_vals[key] = deltas
448    return return_vals
449
450
451def _extract_corr_type(corr_type):
452    if corr_type == 'bb':
453        b2b = True
454        single = True
455    elif corr_type == 'bib':
456        b2b = True
457        single = False
458    else:
459        b2b = False
460        single = False
461    return b2b, single
462
463
464def _find_files(rep_path, prefix, compact, files=[]):
465    sub_ls = []
466    if not files == []:
467        files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
468    else:
469        for (dirpath, dirnames, filenames) in os.walk(rep_path):
470            if compact:
471                sub_ls.extend(filenames)
472            else:
473                sub_ls.extend(dirnames)
474            break
475        if compact:
476            for exc in sub_ls:
477                if not fnmatch.fnmatch(exc, prefix + '*'):
478                    sub_ls = list(set(sub_ls) - set([exc]))
479            sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
480        else:
481            for exc in sub_ls:
482                if not fnmatch.fnmatch(exc, 'cfg*'):
483                    sub_ls = list(set(sub_ls) - set([exc]))
484            sub_ls.sort(key=lambda x: int(x[3:]))
485        files = sub_ls
486    if len(files) == 0:
487        raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.")
488    return files
489
490
491def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks):
492    if version == "0.0":
493        pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
494        if b2b:
495            pattern += ", wf_2 " + str(wf2)
496        qs = quarks.split(" ")
497        pattern += " : " + qs[0] + " - " + qs[1]
498    else:
499        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
500        if b2b:
501            pattern += '\nwf_2      ' + str(wf2)
502    return pattern
503
504
505def _find_correlator(file_name, version, pattern, b2b, silent=False):
506    T = 0
507
508    with open(file_name, "r") as my_file:
509
510        content = my_file.read()
511        match = re.search(pattern, content)
512        if match:
513            if version == "0.0":
514                start_read = content.count('\n', 0, match.start()) + 1
515                T = content.count('\n', start_read)
516            else:
517                start_read = content.count('\n', 0, match.start()) + 5 + b2b
518                end_match = re.search(r'\n\s*\n', content[match.start():])
519                T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
520            if not T > 0:
521                raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!")
522            if not silent:
523                print(T, 'entries, starting to read in line', start_read)
524
525        else:
526            raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.')
527
528    return start_read, T
529
530
531def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im):
532    return_vals = {}
533    with open(rep_path + cfg_file) as fp:
534        lines = fp.readlines()
535        for key in needed_keys:
536            keys = _key2specs(key)
537            name = keys[0]
538            quarks = keys[1]
539            off = keys[2]
540            w = keys[3]
541            w2 = keys[4]
542
543            T = intern[name]["T"]
544            start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
545            # check, if the correlator is in fact
546            # printed completely
547            if (start_read + T + 1 > len(lines)):
548                raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?")
549            corr_lines = lines[start_read - 6: start_read + T]
550            t_vals = []
551
552            if corr_lines[1 - intern[name]["b2b"]].strip() != 'name      ' + name:
553                raise Exception('Wrong format in file', cfg_file)
554
555            for k in range(6, T + 6):
556                floats = list(map(float, corr_lines[k].split()))
557                t_vals.append(floats[-2:][im])
558            return_vals[key] = t_vals
559    return return_vals
560
561
562def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im):
563    rep_path = path + '/' + rep + '/'
564    no_cfg = len(sub_ls)
565
566    return_vals = {}
567    for key in needed_keys:
568        name = _key2specs(key)[0]
569        deltas = []
570        for t in range(intern[name]["T"]):
571            deltas.append(np.zeros(no_cfg))
572        return_vals[key] = deltas
573
574    for cfg in range(no_cfg):
575        cfg_file = sub_ls[cfg]
576        cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im)
577        for key in needed_keys:
578            name = _key2specs(key)[0]
579            for t in range(intern[name]["T"]):
580                return_vals[key][t][cfg] = cfg_data[key][t]
581    return return_vals
582
583
584def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single):
585    try:
586        idl = int(chunk[gauge_line].split(cfg_sep)[-1])
587    except Exception:
588        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
589
590    found_pat = ""
591    data = []
592    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
593        found_pat += li
594    if re.search(pattern, found_pat):
595        for t, line in enumerate(chunk[start_read:start_read + T]):
596            floats = list(map(float, line.split()))
597            data.append(floats[im + 1 - single])
598    return idl, data
599
600
601def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single):
602    with open(filename, 'r') as fp:
603        content = fp.readlines()
604        data_starts = []
605        for linenumber, line in enumerate(content):
606            if "[run]" in line:
607                data_starts.append(linenumber)
608        if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
609            raise Exception("Irregularities in file structure found, not all runs have the same output length")
610        chunk = content[:data_starts[1]]
611        for linenumber, line in enumerate(chunk):
612            if line.startswith("gauge_name"):
613                gauge_line = linenumber
614            elif line.startswith("[correlator]"):
615                corr_line = linenumber
616                found_pat = ""
617                for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
618                    found_pat += li
619                if re.search(pattern, found_pat):
620                    start_read = corr_line + 7 + b2b
621                    break
622                else:
623                    raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
624        endline = corr_line + 6 + b2b
625        while not chunk[endline] == "\n":
626            endline += 1
627        T = endline - start_read
628
629        # all other chunks should follow the same structure
630        rep_idl = []
631        rep_data = []
632
633        for cnfg in range(len(data_starts)):
634            start = data_starts[cnfg]
635            stop = start + data_starts[1]
636            chunk = content[start:stop]
637            idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single)
638            rep_idl.append(idl)
639            rep_data.append(data)
640
641        data = []
642
643        for t in range(T):
644            data.append([])
645            for c in range(len(rep_data)):
646                data[t].append(rep_data[c][t])
647        return T, rep_idl, data
648
649
650def _get_rep_names(ls, ens_name=None, rep_sep='r'):
651    new_names = []
652    for entry in ls:
653        try:
654            idx = entry.index(rep_sep)
655        except Exception:
656            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
657
658        if ens_name:
659            new_names.append(ens_name + '|' + entry[idx:])
660        else:
661            new_names.append(entry[:idx] + '|' + entry[idx:])
662    return new_names
663
664
665def _get_appended_rep_names(ls, prefix, name, ens_name=None, rep_sep='r'):
666    new_names = []
667    for exc in ls:
668        if not fnmatch.fnmatch(exc, prefix + '*.' + name):
669            ls = list(set(ls) - set([exc]))
670    ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
671    for entry in ls:
672        myentry = entry[:-len(name) - 1]
673        try:
674            idx = myentry.index(rep_sep)
675        except Exception:
676            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
677
678        if ens_name:
679            new_names.append(ens_name + '|' + entry[idx:])
680        else:
681            new_names.append(myentry[:idx] + '|' + myentry[idx:])
682    return new_names
sep = '/'
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', silent=False, **kwargs):
14def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
15    """Read sfcf files from given folder structure.
16
17    Parameters
18    ----------
19    path : str
20        Path to the sfcf files.
21    prefix : str
22        Prefix of the sfcf files.
23    name : str
24        Name of the correlation function to read.
25    quarks : str
26        Label of the quarks used in the sfcf input file. e.g. "quark quark"
27        for version 0.0 this does NOT need to be given with the typical " - "
28        that is present in the output file,
29        this is done automatically for this version
30    corr_type : str
31        Type of correlation function to read. Can be
32        - 'bi' for boundary-inner
33        - 'bb' for boundary-boundary
34        - 'bib' for boundary-inner-boundary
35    noffset : int
36        Offset of the source (only relevant when wavefunctions are used)
37    wf : int
38        ID of wave function
39    wf2 : int
40        ID of the second wavefunction
41        (only relevant for boundary-to-boundary correlation functions)
42    im : bool
43        if True, read imaginary instead of real part
44        of the correlation function.
45    names : list
46        Alternative labeling for replicas/ensembles.
47        Has to have the appropriate length
48    ens_name : str
49        replaces the name of the ensemble
50    version: str
51        version of SFCF, with which the measurement was done.
52        if the compact output option (-c) was specified,
53        append a "c" to the version (e.g. "1.0c")
54        if the append output option (-a) was specified,
55        append an "a" to the version
56    cfg_separator : str
57        String that separates the ensemble identifier from the configuration number (default 'n').
58    replica: list
59        list of replica to be read, default is all
60    files: list
61        list of files to be read per replica, default is all.
62        for non-compact output format, hand the folders to be read here.
63    check_configs: list[list[int]]
64        list of list of supposed configs, eg. [range(1,1000)]
65        for one replicum with 1000 configs
66
67    Returns
68    -------
69    result: list[Obs]
70        list of Observables with length T, observable per timeslice.
71        bb-type correlators have length 1.
72    """
73    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
74                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
75                          cfg_separator=cfg_separator, silent=silent, **kwargs)
76    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type (str): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.
def read_sfcf_multi( path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version='1.0c', cfg_separator='n', silent=False, keyed_out=False, **kwargs):
 79def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 80    """Read sfcf files from given folder structure.
 81
 82    Parameters
 83    ----------
 84    path : str
 85        Path to the sfcf files.
 86    prefix : str
 87        Prefix of the sfcf files.
 88    name : str
 89        Name of the correlation function to read.
 90    quarks_list : list[str]
 91        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 92        for version 0.0 this does NOT need to be given with the typical " - "
 93        that is present in the output file,
 94        this is done automatically for this version
 95    corr_type_list : list[str]
 96        Type of correlation function to read. Can be
 97        - 'bi' for boundary-inner
 98        - 'bb' for boundary-boundary
 99        - 'bib' for boundary-inner-boundary
100    noffset_list : list[int]
101        Offset of the source (only relevant when wavefunctions are used)
102    wf_list : int
103        ID of wave function
104    wf2_list : list[int]
105        ID of the second wavefunction
106        (only relevant for boundary-to-boundary correlation functions)
107    im : bool
108        if True, read imaginary instead of real part
109        of the correlation function.
110    names : list
111        Alternative labeling for replicas/ensembles.
112        Has to have the appropriate length
113    ens_name : str
114        replaces the name of the ensemble
115    version: str
116        version of SFCF, with which the measurement was done.
117        if the compact output option (-c) was specified,
118        append a "c" to the version (e.g. "1.0c")
119        if the append output option (-a) was specified,
120        append an "a" to the version
121    cfg_separator : str
122        String that separates the ensemble identifier from the configuration number (default 'n').
123    replica: list
124        list of replica to be read, default is all
125    files: list[list[int]]
126        list of files to be read per replica, default is all.
127        for non-compact output format, hand the folders to be read here.
128    check_configs: list[list[int]]
129        list of list of supposed configs, eg. [range(1,1000)]
130        for one replicum with 1000 configs
131    rep_string: str
132        Separator of ensemble name and replicum. Example: In "ensAr0", "r" would be the separator string.
133    Returns
134    -------
135    result: dict[list[Obs]]
136        dict with one of the following properties:
137        if keyed_out:
138            dict[key] = list[Obs]
139            where key has the form name/quarks/offset/wf/wf2
140        if not keyed_out:
141            dict[name][quarks][offset][wf][wf2] = list[Obs]
142    """
143
144    if kwargs.get('im'):
145        im = 1
146        part = 'imaginary'
147    else:
148        im = 0
149        part = 'real'
150
151    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
152
153    if version not in known_versions:
154        raise Exception("This version is not known!")
155    if (version[-1] == "c"):
156        appended = False
157        compact = True
158        version = version[:-1]
159    elif (version[-1] == "a"):
160        appended = True
161        compact = False
162        version = version[:-1]
163    else:
164        compact = False
165        appended = False
166    ls = []
167    if "replica" in kwargs:
168        ls = kwargs.get("replica")
169    else:
170        for (dirpath, dirnames, filenames) in os.walk(path):
171            if not appended:
172                ls.extend(dirnames)
173            else:
174                ls.extend(filenames)
175            break
176        if not ls:
177            raise Exception('Error, directory not found')
178        # Exclude folders with different names
179        for exc in ls:
180            if not fnmatch.fnmatch(exc, prefix + '*'):
181                ls = list(set(ls) - set([exc]))
182
183    if not appended:
184        ls = sort_names(ls)
185        replica = len(ls)
186
187    else:
188        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
189    if replica == 0:
190        raise Exception('No replica found in directory')
191    if not silent:
192        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
193
194    if 'names' in kwargs:
195        new_names = kwargs.get('names')
196        if len(new_names) != len(set(new_names)):
197            raise Exception("names are not unique!")
198        if len(new_names) != replica:
199            raise Exception('names should have the length', replica)
200
201    else:
202        ens_name = kwargs.get("ens_name")
203        if not appended:
204            new_names = _get_rep_names(ls, ens_name, rep_sep=(kwargs.get('rep_string', 'r')))
205        else:
206            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name, rep_sep=(kwargs.get('rep_string', 'r')))
207        new_names = sort_names(new_names)
208
209    idl = []
210
211    noffset_list = [str(x) for x in noffset_list]
212    wf_list = [str(x) for x in wf_list]
213    wf2_list = [str(x) for x in wf2_list]
214
215    # setup dict structures
216    intern = {}
217    for name, corr_type in zip(name_list, corr_type_list):
218        intern[name] = {}
219        b2b, single = _extract_corr_type(corr_type)
220        intern[name]["b2b"] = b2b
221        intern[name]["single"] = single
222        intern[name]["spec"] = {}
223        for quarks in quarks_list:
224            intern[name]["spec"][quarks] = {}
225            for off in noffset_list:
226                intern[name]["spec"][quarks][off] = {}
227                for w in wf_list:
228                    intern[name]["spec"][quarks][off][w] = {}
229                    if b2b:
230                        for w2 in wf2_list:
231                            intern[name]["spec"][quarks][off][w][w2] = {}
232                            intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
233                    else:
234                        intern[name]["spec"][quarks][off][w]["0"] = {}
235                        intern[name]["spec"][quarks][off][w]["0"]["pattern"] = _make_pattern(version, name, off, w, 0, intern[name]['b2b'], quarks)
236
237    internal_ret_dict = {}
238    needed_keys = []
239    for name, corr_type in zip(name_list, corr_type_list):
240        b2b, single = _extract_corr_type(corr_type)
241        if b2b:
242            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, wf2_list))
243        else:
244            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, ["0"]))
245
246    for key in needed_keys:
247        internal_ret_dict[key] = []
248
249    if not appended:
250        for i, item in enumerate(ls):
251            rep_path = path + '/' + item
252            if "files" in kwargs:
253                files = kwargs.get("files")
254                if isinstance(files, list):
255                    if all(isinstance(f, list) for f in files):
256                        files = files[i]
257                    elif all(isinstance(f, str) for f in files):
258                        files = files
259                    else:
260                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
261                else:
262                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
263
264            else:
265                files = []
266            sub_ls = _find_files(rep_path, prefix, compact, files)
267            rep_idl = []
268            no_cfg = len(sub_ls)
269            for cfg in sub_ls:
270                try:
271                    if compact:
272                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
273                    else:
274                        rep_idl.append(int(cfg[3:]))
275                except Exception:
276                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
277            rep_idl.sort()
278            # maybe there is a better way to print the idls
279            if not silent:
280                print(item, ':', no_cfg, ' configurations')
281            idl.append(rep_idl)
282            # here we have found all the files we need to look into.
283            if i == 0:
284                if version != "0.0" and compact:
285                    file = path + '/' + item + '/' + sub_ls[0]
286                for name_index, name in enumerate(name_list):
287                    if version == "0.0" or not compact:
288                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
289                    if corr_type_list[name_index] == 'bi':
290                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, ["0"])
291                    else:
292                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, wf2_list)
293                    for key in name_keys:
294                        specs = _key2specs(key)
295                        quarks = specs[0]
296                        off = specs[1]
297                        w = specs[2]
298                        w2 = specs[3]
299                        # here, we want to find the place within the file,
300                        # where the correlator we need is stored.
301                        # to do so, the pattern needed is put together
302                        # from the input values
303                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
304                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
305                        intern[name]["T"] = T
306                        # preparing the datastructure
307                        # the correlators get parsed into...
308                        deltas = []
309                        for j in range(intern[name]["T"]):
310                            deltas.append([])
311                        internal_ret_dict[sep.join([name, key])] = deltas
312
313            if compact:
314                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
315                for key in needed_keys:
316                    name = _key2specs(key)[0]
317                    for t in range(intern[name]["T"]):
318                        internal_ret_dict[key][t].append(rep_deltas[key][t])
319            else:
320                for key in needed_keys:
321                    rep_data = []
322                    name = _key2specs(key)[0]
323                    for subitem in sub_ls:
324                        cfg_path = path + '/' + item + '/' + subitem
325                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
326                        rep_data.append(file_data)
327                    for t in range(intern[name]["T"]):
328                        internal_ret_dict[key][t].append([])
329                        for cfg in range(no_cfg):
330                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
331    else:
332        for key in needed_keys:
333            specs = _key2specs(key)
334            name = specs[0]
335            quarks = specs[1]
336            off = specs[2]
337            w = specs[3]
338            w2 = specs[4]
339            if "files" in kwargs:
340                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
341                    name_ls = kwargs.get("files")
342                else:
343                    raise TypeError("In append mode, files has to be of type list[str]!")
344            else:
345                name_ls = ls
346                for exc in name_ls:
347                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
348                        name_ls = list(set(name_ls) - set([exc]))
349            name_ls = sort_names(name_ls)
350            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
351            deltas = []
352            for rep, file in enumerate(name_ls):
353                rep_idl = []
354                filename = path + '/' + file
355                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
356                if rep == 0:
357                    intern[name]['T'] = T
358                    for t in range(intern[name]['T']):
359                        deltas.append([])
360                for t in range(intern[name]['T']):
361                    deltas[t].append(rep_data[t])
362                internal_ret_dict[key] = deltas
363                if name == name_list[0]:
364                    idl.append(rep_idl)
365
366    if kwargs.get("check_configs") is True:
367        if not silent:
368            print("Checking for missing configs...")
369        che = kwargs.get("check_configs")
370        if not (len(che) == len(idl)):
371            raise Exception("check_configs has to be the same length as replica!")
372        for r in range(len(idl)):
373            if not silent:
374                print("checking " + new_names[r])
375            check_idl(idl[r], che[r])
376        if not silent:
377            print("Done")
378
379    result_dict = {}
380    if keyed_out:
381        for key in needed_keys:
382            name = _key2specs(key)[0]
383            result = []
384            for t in range(intern[name]["T"]):
385                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
386            result_dict[key] = result
387    else:
388        for name, corr_type in zip(name_list, corr_type_list):
389            result_dict[name] = {}
390            for quarks in quarks_list:
391                result_dict[name][quarks] = {}
392                for off in noffset_list:
393                    result_dict[name][quarks][off] = {}
394                    for w in wf_list:
395                        result_dict[name][quarks][off][w] = {}
396                        if corr_type != 'bi':
397                            for w2 in wf2_list:
398                                key = _specs2key(name, quarks, off, w, w2)
399                                result = []
400                                for t in range(intern[name]["T"]):
401                                    result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
402                                result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
403                        else:
404                            key = _specs2key(name, quarks, off, w, "0")
405                            result = []
406                            for t in range(intern[name]["T"]):
407                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
408                            result_dict[name][quarks][str(off)][str(w)][str(0)] = result
409    return result_dict

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks_list (list[str]): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type_list (list[str]): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset_list (list[int]): Offset of the source (only relevant when wavefunctions are used)
  • wf_list (int): ID of wave function
  • wf2_list (list[int]): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list[list[int]]): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
  • rep_string (str): Separator of ensemble name and replicum. Example: In "ensAr0", "r" would be the separator string.
Returns
  • result (dict[list[Obs]]): dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs]