pyerrors.input.sfcf

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from .utils import sort_names, check_idl
  7import itertools
  8
  9
 10sep = "/"
 11
 12
 13def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
 14    """Read sfcf files from given folder structure.
 15
 16    Parameters
 17    ----------
 18    path : str
 19        Path to the sfcf files.
 20    prefix : str
 21        Prefix of the sfcf files.
 22    name : str
 23        Name of the correlation function to read.
 24    quarks : str
 25        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 26        for version 0.0 this does NOT need to be given with the typical " - "
 27        that is present in the output file,
 28        this is done automatically for this version
 29    corr_type : str
 30        Type of correlation function to read. Can be
 31        - 'bi' for boundary-inner
 32        - 'bb' for boundary-boundary
 33        - 'bib' for boundary-inner-boundary
 34    noffset : int
 35        Offset of the source (only relevant when wavefunctions are used)
 36    wf : int
 37        ID of wave function
 38    wf2 : int
 39        ID of the second wavefunction
 40        (only relevant for boundary-to-boundary correlation functions)
 41    im : bool
 42        if True, read imaginary instead of real part
 43        of the correlation function.
 44    names : list
 45        Alternative labeling for replicas/ensembles.
 46        Has to have the appropriate length
 47    ens_name : str
 48        replaces the name of the ensemble
 49    version: str
 50        version of SFCF, with which the measurement was done.
 51        if the compact output option (-c) was specified,
 52        append a "c" to the version (e.g. "1.0c")
 53        if the append output option (-a) was specified,
 54        append an "a" to the version
 55    cfg_separator : str
 56        String that separates the ensemble identifier from the configuration number (default 'n').
 57    replica: list
 58        list of replica to be read, default is all
 59    files: list
 60        list of files to be read per replica, default is all.
 61        for non-compact output format, hand the folders to be read here.
 62    check_configs: list[list[int]]
 63        list of list of supposed configs, eg. [range(1,1000)]
 64        for one replicum with 1000 configs
 65
 66    Returns
 67    -------
 68    result: list[Obs]
 69        list of Observables with length T, observable per timeslice.
 70        bb-type correlators have length 1.
 71    """
 72    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
 73                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
 74                          cfg_separator=cfg_separator, silent=silent, **kwargs)
 75    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]
 76
 77
 78def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 79    """Read sfcf files from given folder structure.
 80
 81    Parameters
 82    ----------
 83    path : str
 84        Path to the sfcf files.
 85    prefix : str
 86        Prefix of the sfcf files.
 87    name : str
 88        Name of the correlation function to read.
 89    quarks_list : list[str]
 90        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 91        for version 0.0 this does NOT need to be given with the typical " - "
 92        that is present in the output file,
 93        this is done automatically for this version
 94    corr_type_list : list[str]
 95        Type of correlation function to read. Can be
 96        - 'bi' for boundary-inner
 97        - 'bb' for boundary-boundary
 98        - 'bib' for boundary-inner-boundary
 99    noffset_list : list[int]
100        Offset of the source (only relevant when wavefunctions are used)
101    wf_list : int
102        ID of wave function
103    wf2_list : list[int]
104        ID of the second wavefunction
105        (only relevant for boundary-to-boundary correlation functions)
106    im : bool
107        if True, read imaginary instead of real part
108        of the correlation function.
109    names : list
110        Alternative labeling for replicas/ensembles.
111        Has to have the appropriate length
112    ens_name : str
113        replaces the name of the ensemble
114    version: str
115        version of SFCF, with which the measurement was done.
116        if the compact output option (-c) was specified,
117        append a "c" to the version (e.g. "1.0c")
118        if the append output option (-a) was specified,
119        append an "a" to the version
120    cfg_separator : str
121        String that separates the ensemble identifier from the configuration number (default 'n').
122    replica: list
123        list of replica to be read, default is all
124    files: list
125        list of files to be read per replica, default is all.
126        for non-compact output format, hand the folders to be read here.
127    check_configs: list[list[int]]
128        list of list of supposed configs, eg. [range(1,1000)]
129        for one replicum with 1000 configs
130
131    Returns
132    -------
133    result: dict[list[Obs]]
134        dict with one of the following properties:
135        if keyed_out:
136            dict[key] = list[Obs]
137            where key has the form name/quarks/offset/wf/wf2
138        if not keyed_out:
139            dict[name][quarks][offset][wf][wf2] = list[Obs]
140    """
141
142    if kwargs.get('im'):
143        im = 1
144        part = 'imaginary'
145    else:
146        im = 0
147        part = 'real'
148
149    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
150
151    if version not in known_versions:
152        raise Exception("This version is not known!")
153    if (version[-1] == "c"):
154        appended = False
155        compact = True
156        version = version[:-1]
157    elif (version[-1] == "a"):
158        appended = True
159        compact = False
160        version = version[:-1]
161    else:
162        compact = False
163        appended = False
164    ls = []
165    if "replica" in kwargs:
166        ls = kwargs.get("replica")
167    else:
168        for (dirpath, dirnames, filenames) in os.walk(path):
169            if not appended:
170                ls.extend(dirnames)
171            else:
172                ls.extend(filenames)
173            break
174        if not ls:
175            raise Exception('Error, directory not found')
176        # Exclude folders with different names
177        for exc in ls:
178            if not fnmatch.fnmatch(exc, prefix + '*'):
179                ls = list(set(ls) - set([exc]))
180
181    if not appended:
182        ls = sort_names(ls)
183        replica = len(ls)
184
185    else:
186        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
187    if not silent:
188        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
189
190    if 'names' in kwargs:
191        new_names = kwargs.get('names')
192        if len(new_names) != len(set(new_names)):
193            raise Exception("names are not unique!")
194        if len(new_names) != replica:
195            raise Exception('names should have the length', replica)
196
197    else:
198        ens_name = kwargs.get("ens_name")
199        if not appended:
200            new_names = _get_rep_names(ls, ens_name)
201        else:
202            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
203        new_names = sort_names(new_names)
204
205    idl = []
206
207    noffset_list = [str(x) for x in noffset_list]
208    wf_list = [str(x) for x in wf_list]
209    wf2_list = [str(x) for x in wf2_list]
210
211    # setup dict structures
212    intern = {}
213    for name, corr_type in zip(name_list, corr_type_list):
214        intern[name] = {}
215        b2b, single = _extract_corr_type(corr_type)
216        intern[name]["b2b"] = b2b
217        intern[name]["single"] = single
218        intern[name]["spec"] = {}
219        for quarks in quarks_list:
220            intern[name]["spec"][quarks] = {}
221            for off in noffset_list:
222                intern[name]["spec"][quarks][off] = {}
223                for w in wf_list:
224                    intern[name]["spec"][quarks][off][w] = {}
225                    for w2 in wf2_list:
226                        intern[name]["spec"][quarks][off][w][w2] = {}
227                        intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
228
229    internal_ret_dict = {}
230    needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list)
231    for key in needed_keys:
232        internal_ret_dict[key] = []
233
234    if not appended:
235        for i, item in enumerate(ls):
236            rep_path = path + '/' + item
237            if "files" in kwargs:
238                files = kwargs.get("files")
239            else:
240                files = []
241            sub_ls = _find_files(rep_path, prefix, compact, files)
242            rep_idl = []
243            no_cfg = len(sub_ls)
244            for cfg in sub_ls:
245                try:
246                    if compact:
247                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
248                    else:
249                        rep_idl.append(int(cfg[3:]))
250                except Exception:
251                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
252            rep_idl.sort()
253            # maybe there is a better way to print the idls
254            if not silent:
255                print(item, ':', no_cfg, ' configurations')
256            idl.append(rep_idl)
257            # here we have found all the files we need to look into.
258            if i == 0:
259                if version != "0.0" and compact:
260                    file = path + '/' + item + '/' + sub_ls[0]
261                for name in name_list:
262                    if version == "0.0" or not compact:
263                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
264                    for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list):
265                        specs = _key2specs(key)
266                        quarks = specs[0]
267                        off = specs[1]
268                        w = specs[2]
269                        w2 = specs[3]
270                        # here, we want to find the place within the file,
271                        # where the correlator we need is stored.
272                        # to do so, the pattern needed is put together
273                        # from the input values
274                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
275                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
276                        intern[name]["T"] = T
277                        # preparing the datastructure
278                        # the correlators get parsed into...
279                        deltas = []
280                        for j in range(intern[name]["T"]):
281                            deltas.append([])
282                        internal_ret_dict[sep.join([name, key])] = deltas
283
284            if compact:
285                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
286                for key in needed_keys:
287                    name = _key2specs(key)[0]
288                    for t in range(intern[name]["T"]):
289                        internal_ret_dict[key][t].append(rep_deltas[key][t])
290            else:
291                for key in needed_keys:
292                    rep_data = []
293                    name = _key2specs(key)[0]
294                    for subitem in sub_ls:
295                        cfg_path = path + '/' + item + '/' + subitem
296                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
297                        rep_data.append(file_data)
298                    print(rep_data)
299                    for t in range(intern[name]["T"]):
300                        internal_ret_dict[key][t].append([])
301                        for cfg in range(no_cfg):
302                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
303    else:
304        for key in needed_keys:
305            specs = _key2specs(key)
306            name = specs[0]
307            quarks = specs[1]
308            off = specs[2]
309            w = specs[3]
310            w2 = specs[4]
311            if "files" in kwargs:
312                ls = kwargs.get("files")
313            else:
314                name_ls = ls
315                for exc in name_ls:
316                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
317                        name_ls = list(set(name_ls) - set([exc]))
318            name_ls = sort_names(name_ls)
319            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
320            deltas = []
321            for rep, file in enumerate(name_ls):
322                rep_idl = []
323                filename = path + '/' + file
324                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
325                if rep == 0:
326                    intern[name]['T'] = T
327                    for t in range(intern[name]['T']):
328                        deltas.append([])
329                for t in range(intern[name]['T']):
330                    deltas[t].append(rep_data[t])
331                internal_ret_dict[key] = deltas
332                if name == name_list[0]:
333                    idl.append(rep_idl)
334
335    if kwargs.get("check_configs") is True:
336        if not silent:
337            print("Checking for missing configs...")
338        che = kwargs.get("check_configs")
339        if not (len(che) == len(idl)):
340            raise Exception("check_configs has to be the same length as replica!")
341        for r in range(len(idl)):
342            if not silent:
343                print("checking " + new_names[r])
344            check_idl(idl[r], che[r])
345        if not silent:
346            print("Done")
347
348    result_dict = {}
349    if keyed_out:
350        for key in needed_keys:
351            result = []
352            for t in range(intern[name]["T"]):
353                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
354            result_dict[key] = result
355    else:
356        for name in name_list:
357            result_dict[name] = {}
358            for quarks in quarks_list:
359                result_dict[name][quarks] = {}
360                for off in noffset_list:
361                    result_dict[name][quarks][off] = {}
362                    for w in wf_list:
363                        result_dict[name][quarks][off][w] = {}
364                        for w2 in wf2_list:
365                            key = _specs2key(name, quarks, off, w, w2)
366                            result = []
367                            for t in range(intern[name]["T"]):
368                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
369                            result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
370    return result_dict
371
372
373def _lists2key(*lists):
374    keys = []
375    for tup in itertools.product(*lists):
376        keys.append(sep.join(tup))
377    return keys
378
379
380def _key2specs(key):
381    return key.split(sep)
382
383
384def _specs2key(*specs):
385    return sep.join(specs)
386
387
388def _read_o_file(cfg_path, name, needed_keys, intern, version, im):
389    return_vals = {}
390    for key in needed_keys:
391        file = cfg_path + '/' + name
392        specs = _key2specs(key)
393        if specs[0] == name:
394            with open(file) as fp:
395                lines = fp.readlines()
396                quarks = specs[1]
397                off = specs[2]
398                w = specs[3]
399                w2 = specs[4]
400                T = intern[name]["T"]
401                start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
402                deltas = []
403                for line in lines[start_read:start_read + T]:
404                    floats = list(map(float, line.split()))
405                    if version == "0.0":
406                        deltas.append(floats[im - intern[name]["single"]])
407                    else:
408                        deltas.append(floats[1 + im - intern[name]["single"]])
409                return_vals[key] = deltas
410    return return_vals
411
412
413def _extract_corr_type(corr_type):
414    if corr_type == 'bb':
415        b2b = True
416        single = True
417    elif corr_type == 'bib':
418        b2b = True
419        single = False
420    else:
421        b2b = False
422        single = False
423    return b2b, single
424
425
426def _find_files(rep_path, prefix, compact, files=[]):
427    sub_ls = []
428    if not files == []:
429        files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
430    else:
431        for (dirpath, dirnames, filenames) in os.walk(rep_path):
432            if compact:
433                sub_ls.extend(filenames)
434            else:
435                sub_ls.extend(dirnames)
436            break
437        if compact:
438            for exc in sub_ls:
439                if not fnmatch.fnmatch(exc, prefix + '*'):
440                    sub_ls = list(set(sub_ls) - set([exc]))
441            sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
442        else:
443            for exc in sub_ls:
444                if not fnmatch.fnmatch(exc, 'cfg*'):
445                    sub_ls = list(set(sub_ls) - set([exc]))
446            sub_ls.sort(key=lambda x: int(x[3:]))
447        files = sub_ls
448    if len(files) == 0:
449        raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.")
450    return files
451
452
453def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks):
454    if version == "0.0":
455        pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
456        if b2b:
457            pattern += ", wf_2 " + str(wf2)
458        qs = quarks.split(" ")
459        pattern += " : " + qs[0] + " - " + qs[1]
460    else:
461        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
462        if b2b:
463            pattern += '\nwf_2      ' + str(wf2)
464    return pattern
465
466
467def _find_correlator(file_name, version, pattern, b2b, silent=False):
468    T = 0
469
470    with open(file_name, "r") as my_file:
471
472        content = my_file.read()
473        match = re.search(pattern, content)
474        if match:
475            if version == "0.0":
476                start_read = content.count('\n', 0, match.start()) + 1
477                T = content.count('\n', start_read)
478            else:
479                start_read = content.count('\n', 0, match.start()) + 5 + b2b
480                end_match = re.search(r'\n\s*\n', content[match.start():])
481                T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
482            if not T > 0:
483                raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!")
484            if not silent:
485                print(T, 'entries, starting to read in line', start_read)
486
487        else:
488            raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.')
489
490    return start_read, T
491
492
493def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im):
494    return_vals = {}
495    with open(rep_path + cfg_file) as fp:
496        lines = fp.readlines()
497        for key in needed_keys:
498            keys = _key2specs(key)
499            name = keys[0]
500            quarks = keys[1]
501            off = keys[2]
502            w = keys[3]
503            w2 = keys[4]
504
505            T = intern[name]["T"]
506            start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
507            # check, if the correlator is in fact
508            # printed completely
509            if (start_read + T + 1 > len(lines)):
510                raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?")
511            corr_lines = lines[start_read - 6: start_read + T]
512            t_vals = []
513
514            if corr_lines[1 - intern[name]["b2b"]].strip() != 'name      ' + name:
515                raise Exception('Wrong format in file', cfg_file)
516
517            for k in range(6, T + 6):
518                floats = list(map(float, corr_lines[k].split()))
519                t_vals.append(floats[-2:][im])
520            return_vals[key] = t_vals
521    return return_vals
522
523
524def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im):
525    rep_path = path + '/' + rep + '/'
526    no_cfg = len(sub_ls)
527
528    return_vals = {}
529    for key in needed_keys:
530        name = _key2specs(key)[0]
531        deltas = []
532        for t in range(intern[name]["T"]):
533            deltas.append(np.zeros(no_cfg))
534        return_vals[key] = deltas
535
536    for cfg in range(no_cfg):
537        cfg_file = sub_ls[cfg]
538        cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im)
539        for key in needed_keys:
540            name = _key2specs(key)[0]
541            for t in range(intern[name]["T"]):
542                return_vals[key][t][cfg] = cfg_data[key][t]
543    return return_vals
544
545
546def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single):
547    try:
548        idl = int(chunk[gauge_line].split(cfg_sep)[-1])
549    except Exception:
550        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
551
552    found_pat = ""
553    data = []
554    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
555        found_pat += li
556    if re.search(pattern, found_pat):
557        for t, line in enumerate(chunk[start_read:start_read + T]):
558            floats = list(map(float, line.split()))
559            data.append(floats[im + 1 - single])
560    return idl, data
561
562
563def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single):
564    with open(filename, 'r') as fp:
565        content = fp.readlines()
566        data_starts = []
567        for linenumber, line in enumerate(content):
568            if "[run]" in line:
569                data_starts.append(linenumber)
570        if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
571            raise Exception("Irregularities in file structure found, not all runs have the same output length")
572        chunk = content[:data_starts[1]]
573        for linenumber, line in enumerate(chunk):
574            if line.startswith("gauge_name"):
575                gauge_line = linenumber
576            elif line.startswith("[correlator]"):
577                corr_line = linenumber
578                found_pat = ""
579                for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
580                    found_pat += li
581                if re.search(pattern, found_pat):
582                    start_read = corr_line + 7 + b2b
583                    break
584                else:
585                    raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
586        endline = corr_line + 6 + b2b
587        while not chunk[endline] == "\n":
588            endline += 1
589        T = endline - start_read
590
591        # all other chunks should follow the same structure
592        rep_idl = []
593        rep_data = []
594
595        for cnfg in range(len(data_starts)):
596            start = data_starts[cnfg]
597            stop = start + data_starts[1]
598            chunk = content[start:stop]
599            idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single)
600            rep_idl.append(idl)
601            rep_data.append(data)
602
603        data = []
604
605        for t in range(T):
606            data.append([])
607            for c in range(len(rep_data)):
608                data[t].append(rep_data[c][t])
609        return T, rep_idl, data
610
611
612def _get_rep_names(ls, ens_name=None):
613    new_names = []
614    for entry in ls:
615        try:
616            idx = entry.index('r')
617        except Exception:
618            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
619
620        if ens_name:
621            new_names.append('ens_name' + '|' + entry[idx:])
622        else:
623            new_names.append(entry[:idx] + '|' + entry[idx:])
624    return new_names
625
626
627def _get_appended_rep_names(ls, prefix, name, ens_name=None):
628    new_names = []
629    for exc in ls:
630        if not fnmatch.fnmatch(exc, prefix + '*.' + name):
631            ls = list(set(ls) - set([exc]))
632    ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
633    for entry in ls:
634        myentry = entry[:-len(name) - 1]
635        try:
636            idx = myentry.index('r')
637        except Exception:
638            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
639
640        if ens_name:
641            new_names.append('ens_name' + '|' + entry[idx:])
642        else:
643            new_names.append(myentry[:idx] + '|' + myentry[idx:])
644    return new_names
sep = '/'
def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', silent=False, **kwargs):
14def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
15    """Read sfcf files from given folder structure.
16
17    Parameters
18    ----------
19    path : str
20        Path to the sfcf files.
21    prefix : str
22        Prefix of the sfcf files.
23    name : str
24        Name of the correlation function to read.
25    quarks : str
26        Label of the quarks used in the sfcf input file. e.g. "quark quark"
27        for version 0.0 this does NOT need to be given with the typical " - "
28        that is present in the output file,
29        this is done automatically for this version
30    corr_type : str
31        Type of correlation function to read. Can be
32        - 'bi' for boundary-inner
33        - 'bb' for boundary-boundary
34        - 'bib' for boundary-inner-boundary
35    noffset : int
36        Offset of the source (only relevant when wavefunctions are used)
37    wf : int
38        ID of wave function
39    wf2 : int
40        ID of the second wavefunction
41        (only relevant for boundary-to-boundary correlation functions)
42    im : bool
43        if True, read imaginary instead of real part
44        of the correlation function.
45    names : list
46        Alternative labeling for replicas/ensembles.
47        Has to have the appropriate length
48    ens_name : str
49        replaces the name of the ensemble
50    version: str
51        version of SFCF, with which the measurement was done.
52        if the compact output option (-c) was specified,
53        append a "c" to the version (e.g. "1.0c")
54        if the append output option (-a) was specified,
55        append an "a" to the version
56    cfg_separator : str
57        String that separates the ensemble identifier from the configuration number (default 'n').
58    replica: list
59        list of replica to be read, default is all
60    files: list
61        list of files to be read per replica, default is all.
62        for non-compact output format, hand the folders to be read here.
63    check_configs: list[list[int]]
64        list of list of supposed configs, eg. [range(1,1000)]
65        for one replicum with 1000 configs
66
67    Returns
68    -------
69    result: list[Obs]
70        list of Observables with length T, observable per timeslice.
71        bb-type correlators have length 1.
72    """
73    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
74                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
75                          cfg_separator=cfg_separator, silent=silent, **kwargs)
76    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type (str): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset (int): Offset of the source (only relevant when wavefunctions are used)
  • wf (int): ID of wave function
  • wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.
def read_sfcf_multi( path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version='1.0c', cfg_separator='n', silent=False, keyed_out=False, **kwargs):
 79def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 80    """Read sfcf files from given folder structure.
 81
 82    Parameters
 83    ----------
 84    path : str
 85        Path to the sfcf files.
 86    prefix : str
 87        Prefix of the sfcf files.
 88    name : str
 89        Name of the correlation function to read.
 90    quarks_list : list[str]
 91        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 92        for version 0.0 this does NOT need to be given with the typical " - "
 93        that is present in the output file,
 94        this is done automatically for this version
 95    corr_type_list : list[str]
 96        Type of correlation function to read. Can be
 97        - 'bi' for boundary-inner
 98        - 'bb' for boundary-boundary
 99        - 'bib' for boundary-inner-boundary
100    noffset_list : list[int]
101        Offset of the source (only relevant when wavefunctions are used)
102    wf_list : int
103        ID of wave function
104    wf2_list : list[int]
105        ID of the second wavefunction
106        (only relevant for boundary-to-boundary correlation functions)
107    im : bool
108        if True, read imaginary instead of real part
109        of the correlation function.
110    names : list
111        Alternative labeling for replicas/ensembles.
112        Has to have the appropriate length
113    ens_name : str
114        replaces the name of the ensemble
115    version: str
116        version of SFCF, with which the measurement was done.
117        if the compact output option (-c) was specified,
118        append a "c" to the version (e.g. "1.0c")
119        if the append output option (-a) was specified,
120        append an "a" to the version
121    cfg_separator : str
122        String that separates the ensemble identifier from the configuration number (default 'n').
123    replica: list
124        list of replica to be read, default is all
125    files: list
126        list of files to be read per replica, default is all.
127        for non-compact output format, hand the folders to be read here.
128    check_configs: list[list[int]]
129        list of list of supposed configs, eg. [range(1,1000)]
130        for one replicum with 1000 configs
131
132    Returns
133    -------
134    result: dict[list[Obs]]
135        dict with one of the following properties:
136        if keyed_out:
137            dict[key] = list[Obs]
138            where key has the form name/quarks/offset/wf/wf2
139        if not keyed_out:
140            dict[name][quarks][offset][wf][wf2] = list[Obs]
141    """
142
143    if kwargs.get('im'):
144        im = 1
145        part = 'imaginary'
146    else:
147        im = 0
148        part = 'real'
149
150    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
151
152    if version not in known_versions:
153        raise Exception("This version is not known!")
154    if (version[-1] == "c"):
155        appended = False
156        compact = True
157        version = version[:-1]
158    elif (version[-1] == "a"):
159        appended = True
160        compact = False
161        version = version[:-1]
162    else:
163        compact = False
164        appended = False
165    ls = []
166    if "replica" in kwargs:
167        ls = kwargs.get("replica")
168    else:
169        for (dirpath, dirnames, filenames) in os.walk(path):
170            if not appended:
171                ls.extend(dirnames)
172            else:
173                ls.extend(filenames)
174            break
175        if not ls:
176            raise Exception('Error, directory not found')
177        # Exclude folders with different names
178        for exc in ls:
179            if not fnmatch.fnmatch(exc, prefix + '*'):
180                ls = list(set(ls) - set([exc]))
181
182    if not appended:
183        ls = sort_names(ls)
184        replica = len(ls)
185
186    else:
187        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
188    if not silent:
189        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
190
191    if 'names' in kwargs:
192        new_names = kwargs.get('names')
193        if len(new_names) != len(set(new_names)):
194            raise Exception("names are not unique!")
195        if len(new_names) != replica:
196            raise Exception('names should have the length', replica)
197
198    else:
199        ens_name = kwargs.get("ens_name")
200        if not appended:
201            new_names = _get_rep_names(ls, ens_name)
202        else:
203            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
204        new_names = sort_names(new_names)
205
206    idl = []
207
208    noffset_list = [str(x) for x in noffset_list]
209    wf_list = [str(x) for x in wf_list]
210    wf2_list = [str(x) for x in wf2_list]
211
212    # setup dict structures
213    intern = {}
214    for name, corr_type in zip(name_list, corr_type_list):
215        intern[name] = {}
216        b2b, single = _extract_corr_type(corr_type)
217        intern[name]["b2b"] = b2b
218        intern[name]["single"] = single
219        intern[name]["spec"] = {}
220        for quarks in quarks_list:
221            intern[name]["spec"][quarks] = {}
222            for off in noffset_list:
223                intern[name]["spec"][quarks][off] = {}
224                for w in wf_list:
225                    intern[name]["spec"][quarks][off][w] = {}
226                    for w2 in wf2_list:
227                        intern[name]["spec"][quarks][off][w][w2] = {}
228                        intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
229
230    internal_ret_dict = {}
231    needed_keys = _lists2key(name_list, quarks_list, noffset_list, wf_list, wf2_list)
232    for key in needed_keys:
233        internal_ret_dict[key] = []
234
235    if not appended:
236        for i, item in enumerate(ls):
237            rep_path = path + '/' + item
238            if "files" in kwargs:
239                files = kwargs.get("files")
240            else:
241                files = []
242            sub_ls = _find_files(rep_path, prefix, compact, files)
243            rep_idl = []
244            no_cfg = len(sub_ls)
245            for cfg in sub_ls:
246                try:
247                    if compact:
248                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
249                    else:
250                        rep_idl.append(int(cfg[3:]))
251                except Exception:
252                    raise Exception("Couldn't parse idl from directroy, problem with file " + cfg)
253            rep_idl.sort()
254            # maybe there is a better way to print the idls
255            if not silent:
256                print(item, ':', no_cfg, ' configurations')
257            idl.append(rep_idl)
258            # here we have found all the files we need to look into.
259            if i == 0:
260                if version != "0.0" and compact:
261                    file = path + '/' + item + '/' + sub_ls[0]
262                for name in name_list:
263                    if version == "0.0" or not compact:
264                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
265                    for key in _lists2key(quarks_list, noffset_list, wf_list, wf2_list):
266                        specs = _key2specs(key)
267                        quarks = specs[0]
268                        off = specs[1]
269                        w = specs[2]
270                        w2 = specs[3]
271                        # here, we want to find the place within the file,
272                        # where the correlator we need is stored.
273                        # to do so, the pattern needed is put together
274                        # from the input values
275                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
276                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
277                        intern[name]["T"] = T
278                        # preparing the datastructure
279                        # the correlators get parsed into...
280                        deltas = []
281                        for j in range(intern[name]["T"]):
282                            deltas.append([])
283                        internal_ret_dict[sep.join([name, key])] = deltas
284
285            if compact:
286                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
287                for key in needed_keys:
288                    name = _key2specs(key)[0]
289                    for t in range(intern[name]["T"]):
290                        internal_ret_dict[key][t].append(rep_deltas[key][t])
291            else:
292                for key in needed_keys:
293                    rep_data = []
294                    name = _key2specs(key)[0]
295                    for subitem in sub_ls:
296                        cfg_path = path + '/' + item + '/' + subitem
297                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
298                        rep_data.append(file_data)
299                    print(rep_data)
300                    for t in range(intern[name]["T"]):
301                        internal_ret_dict[key][t].append([])
302                        for cfg in range(no_cfg):
303                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
304    else:
305        for key in needed_keys:
306            specs = _key2specs(key)
307            name = specs[0]
308            quarks = specs[1]
309            off = specs[2]
310            w = specs[3]
311            w2 = specs[4]
312            if "files" in kwargs:
313                ls = kwargs.get("files")
314            else:
315                name_ls = ls
316                for exc in name_ls:
317                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
318                        name_ls = list(set(name_ls) - set([exc]))
319            name_ls = sort_names(name_ls)
320            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
321            deltas = []
322            for rep, file in enumerate(name_ls):
323                rep_idl = []
324                filename = path + '/' + file
325                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
326                if rep == 0:
327                    intern[name]['T'] = T
328                    for t in range(intern[name]['T']):
329                        deltas.append([])
330                for t in range(intern[name]['T']):
331                    deltas[t].append(rep_data[t])
332                internal_ret_dict[key] = deltas
333                if name == name_list[0]:
334                    idl.append(rep_idl)
335
336    if kwargs.get("check_configs") is True:
337        if not silent:
338            print("Checking for missing configs...")
339        che = kwargs.get("check_configs")
340        if not (len(che) == len(idl)):
341            raise Exception("check_configs has to be the same length as replica!")
342        for r in range(len(idl)):
343            if not silent:
344                print("checking " + new_names[r])
345            check_idl(idl[r], che[r])
346        if not silent:
347            print("Done")
348
349    result_dict = {}
350    if keyed_out:
351        for key in needed_keys:
352            result = []
353            for t in range(intern[name]["T"]):
354                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
355            result_dict[key] = result
356    else:
357        for name in name_list:
358            result_dict[name] = {}
359            for quarks in quarks_list:
360                result_dict[name][quarks] = {}
361                for off in noffset_list:
362                    result_dict[name][quarks][off] = {}
363                    for w in wf_list:
364                        result_dict[name][quarks][off][w] = {}
365                        for w2 in wf2_list:
366                            key = _specs2key(name, quarks, off, w, w2)
367                            result = []
368                            for t in range(intern[name]["T"]):
369                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
370                            result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
371    return result_dict

Read sfcf files from given folder structure.

Parameters
  • path (str): Path to the sfcf files.
  • prefix (str): Prefix of the sfcf files.
  • name (str): Name of the correlation function to read.
  • quarks_list (list[str]): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
  • corr_type_list (list[str]): Type of correlation function to read. Can be
    • 'bi' for boundary-inner
    • 'bb' for boundary-boundary
    • 'bib' for boundary-inner-boundary
  • noffset_list (list[int]): Offset of the source (only relevant when wavefunctions are used)
  • wf_list (int): ID of wave function
  • wf2_list (list[int]): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
  • im (bool): if True, read imaginary instead of real part of the correlation function.
  • names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
  • ens_name (str): replaces the name of the ensemble
  • version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
  • cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
  • replica (list): list of replica to be read, default is all
  • files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
  • check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs
Returns
  • result (dict[list[Obs]]): dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs]