pyerrors.input.sfcf

View Source

  1import os
  2import fnmatch
  3import re
  4import numpy as np  # Thinly-wrapped numpy
  5from ..obs import Obs
  6from .utils import sort_names, check_idl
  7import itertools
  8
  9
 10sep = "/"
 11
 12
 13def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
 14    """Read sfcf files from given folder structure.
 15
 16    Parameters
 17    ----------
 18    path : str
 19        Path to the sfcf files.
 20    prefix : str
 21        Prefix of the sfcf files.
 22    name : str
 23        Name of the correlation function to read.
 24    quarks : str
 25        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 26        for version 0.0 this does NOT need to be given with the typical " - "
 27        that is present in the output file,
 28        this is done automatically for this version
 29    corr_type : str
 30        Type of correlation function to read. Can be
 31        - 'bi' for boundary-inner
 32        - 'bb' for boundary-boundary
 33        - 'bib' for boundary-inner-boundary
 34    noffset : int
 35        Offset of the source (only relevant when wavefunctions are used)
 36    wf : int
 37        ID of wave function
 38    wf2 : int
 39        ID of the second wavefunction
 40        (only relevant for boundary-to-boundary correlation functions)
 41    im : bool
 42        if True, read imaginary instead of real part
 43        of the correlation function.
 44    names : list
 45        Alternative labeling for replicas/ensembles.
 46        Has to have the appropriate length
 47    ens_name : str
 48        replaces the name of the ensemble
 49    version: str
 50        version of SFCF, with which the measurement was done.
 51        if the compact output option (-c) was specified,
 52        append a "c" to the version (e.g. "1.0c")
 53        if the append output option (-a) was specified,
 54        append an "a" to the version
 55    cfg_separator : str
 56        String that separates the ensemble identifier from the configuration number (default 'n').
 57    replica: list
 58        list of replica to be read, default is all
 59    files: list
 60        list of files to be read per replica, default is all.
 61        for non-compact output format, hand the folders to be read here.
 62    check_configs: list[list[int]]
 63        list of list of supposed configs, eg. [range(1,1000)]
 64        for one replicum with 1000 configs
 65
 66    Returns
 67    -------
 68    result: list[Obs]
 69        list of Observables with length T, observable per timeslice.
 70        bb-type correlators have length 1.
 71    """
 72    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
 73                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
 74                          cfg_separator=cfg_separator, silent=silent, **kwargs)
 75    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]
 76
 77
 78def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 79    """Read sfcf files from given folder structure.
 80
 81    Parameters
 82    ----------
 83    path : str
 84        Path to the sfcf files.
 85    prefix : str
 86        Prefix of the sfcf files.
 87    name : str
 88        Name of the correlation function to read.
 89    quarks_list : list[str]
 90        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 91        for version 0.0 this does NOT need to be given with the typical " - "
 92        that is present in the output file,
 93        this is done automatically for this version
 94    corr_type_list : list[str]
 95        Type of correlation function to read. Can be
 96        - 'bi' for boundary-inner
 97        - 'bb' for boundary-boundary
 98        - 'bib' for boundary-inner-boundary
 99    noffset_list : list[int]
100        Offset of the source (only relevant when wavefunctions are used)
101    wf_list : int
102        ID of wave function
103    wf2_list : list[int]
104        ID of the second wavefunction
105        (only relevant for boundary-to-boundary correlation functions)
106    im : bool
107        if True, read imaginary instead of real part
108        of the correlation function.
109    names : list
110        Alternative labeling for replicas/ensembles.
111        Has to have the appropriate length
112    ens_name : str
113        replaces the name of the ensemble
114    version: str
115        version of SFCF, with which the measurement was done.
116        if the compact output option (-c) was specified,
117        append a "c" to the version (e.g. "1.0c")
118        if the append output option (-a) was specified,
119        append an "a" to the version
120    cfg_separator : str
121        String that separates the ensemble identifier from the configuration number (default 'n').
122    replica: list
123        list of replica to be read, default is all
124    files: list[list[int]]
125        list of files to be read per replica, default is all.
126        for non-compact output format, hand the folders to be read here.
127    check_configs: list[list[int]]
128        list of list of supposed configs, eg. [range(1,1000)]
129        for one replicum with 1000 configs
130
131    Returns
132    -------
133    result: dict[list[Obs]]
134        dict with one of the following properties:
135        if keyed_out:
136            dict[key] = list[Obs]
137            where key has the form name/quarks/offset/wf/wf2
138        if not keyed_out:
139            dict[name][quarks][offset][wf][wf2] = list[Obs]
140    """
141
142    if kwargs.get('im'):
143        im = 1
144        part = 'imaginary'
145    else:
146        im = 0
147        part = 'real'
148
149    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
150
151    if version not in known_versions:
152        raise Exception("This version is not known!")
153    if (version[-1] == "c"):
154        appended = False
155        compact = True
156        version = version[:-1]
157    elif (version[-1] == "a"):
158        appended = True
159        compact = False
160        version = version[:-1]
161    else:
162        compact = False
163        appended = False
164    ls = []
165    if "replica" in kwargs:
166        ls = kwargs.get("replica")
167    else:
168        for (dirpath, dirnames, filenames) in os.walk(path):
169            if not appended:
170                ls.extend(dirnames)
171            else:
172                ls.extend(filenames)
173            break
174        if not ls:
175            raise Exception('Error, directory not found')
176        # Exclude folders with different names
177        for exc in ls:
178            if not fnmatch.fnmatch(exc, prefix + '*'):
179                ls = list(set(ls) - set([exc]))
180
181    if not appended:
182        ls = sort_names(ls)
183        replica = len(ls)
184
185    else:
186        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
187    if replica == 0:
188        raise Exception('No replica found in directory')
189    if not silent:
190        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
191
192    if 'names' in kwargs:
193        new_names = kwargs.get('names')
194        if len(new_names) != len(set(new_names)):
195            raise Exception("names are not unique!")
196        if len(new_names) != replica:
197            raise Exception('names should have the length', replica)
198
199    else:
200        ens_name = kwargs.get("ens_name")
201        if not appended:
202            new_names = _get_rep_names(ls, ens_name)
203        else:
204            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
205        new_names = sort_names(new_names)
206
207    idl = []
208
209    noffset_list = [str(x) for x in noffset_list]
210    wf_list = [str(x) for x in wf_list]
211    wf2_list = [str(x) for x in wf2_list]
212
213    # setup dict structures
214    intern = {}
215    for name, corr_type in zip(name_list, corr_type_list):
216        intern[name] = {}
217        b2b, single = _extract_corr_type(corr_type)
218        intern[name]["b2b"] = b2b
219        intern[name]["single"] = single
220        intern[name]["spec"] = {}
221        for quarks in quarks_list:
222            intern[name]["spec"][quarks] = {}
223            for off in noffset_list:
224                intern[name]["spec"][quarks][off] = {}
225                for w in wf_list:
226                    intern[name]["spec"][quarks][off][w] = {}
227                    if b2b:
228                        for w2 in wf2_list:
229                            intern[name]["spec"][quarks][off][w][w2] = {}
230                            intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
231                    else:
232                        intern[name]["spec"][quarks][off][w]["0"] = {}
233                        intern[name]["spec"][quarks][off][w]["0"]["pattern"] = _make_pattern(version, name, off, w, 0, intern[name]['b2b'], quarks)
234
235    internal_ret_dict = {}
236    needed_keys = []
237    for name, corr_type in zip(name_list, corr_type_list):
238        b2b, single = _extract_corr_type(corr_type)
239        if b2b:
240            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, wf2_list))
241        else:
242            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, ["0"]))
243
244    for key in needed_keys:
245        internal_ret_dict[key] = []
246
247    if not appended:
248        for i, item in enumerate(ls):
249            rep_path = path + '/' + item
250            if "files" in kwargs:
251                files = kwargs.get("files")
252                if isinstance(files, list):
253                    if all(isinstance(f, list) for f in files):
254                        files = files[i]
255                    elif all(isinstance(f, str) for f in files):
256                        files = files
257                    else:
258                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
259                else:
260                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
261
262            else:
263                files = []
264            sub_ls = _find_files(rep_path, prefix, compact, files)
265            rep_idl = []
266            no_cfg = len(sub_ls)
267            for cfg in sub_ls:
268                try:
269                    if compact:
270                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
271                    else:
272                        rep_idl.append(int(cfg[3:]))
273                except Exception:
274                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
275            rep_idl.sort()
276            # maybe there is a better way to print the idls
277            if not silent:
278                print(item, ':', no_cfg, ' configurations')
279            idl.append(rep_idl)
280            # here we have found all the files we need to look into.
281            if i == 0:
282                if version != "0.0" and compact:
283                    file = path + '/' + item + '/' + sub_ls[0]
284                for name_index, name in enumerate(name_list):
285                    if version == "0.0" or not compact:
286                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
287                    if corr_type_list[name_index] == 'bi':
288                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, ["0"])
289                    else:
290                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, wf2_list)
291                    for key in name_keys:
292                        specs = _key2specs(key)
293                        quarks = specs[0]
294                        off = specs[1]
295                        w = specs[2]
296                        w2 = specs[3]
297                        # here, we want to find the place within the file,
298                        # where the correlator we need is stored.
299                        # to do so, the pattern needed is put together
300                        # from the input values
301                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
302                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
303                        intern[name]["T"] = T
304                        # preparing the datastructure
305                        # the correlators get parsed into...
306                        deltas = []
307                        for j in range(intern[name]["T"]):
308                            deltas.append([])
309                        internal_ret_dict[sep.join([name, key])] = deltas
310
311            if compact:
312                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
313                for key in needed_keys:
314                    name = _key2specs(key)[0]
315                    for t in range(intern[name]["T"]):
316                        internal_ret_dict[key][t].append(rep_deltas[key][t])
317            else:
318                for key in needed_keys:
319                    rep_data = []
320                    name = _key2specs(key)[0]
321                    for subitem in sub_ls:
322                        cfg_path = path + '/' + item + '/' + subitem
323                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
324                        rep_data.append(file_data)
325                    for t in range(intern[name]["T"]):
326                        internal_ret_dict[key][t].append([])
327                        for cfg in range(no_cfg):
328                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
329    else:
330        for key in needed_keys:
331            specs = _key2specs(key)
332            name = specs[0]
333            quarks = specs[1]
334            off = specs[2]
335            w = specs[3]
336            w2 = specs[4]
337            if "files" in kwargs:
338                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
339                    name_ls = kwargs.get("files")
340                else:
341                    raise TypeError("In append mode, files has to be of type list[str]!")
342            else:
343                name_ls = ls
344                for exc in name_ls:
345                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
346                        name_ls = list(set(name_ls) - set([exc]))
347            name_ls = sort_names(name_ls)
348            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
349            deltas = []
350            for rep, file in enumerate(name_ls):
351                rep_idl = []
352                filename = path + '/' + file
353                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
354                if rep == 0:
355                    intern[name]['T'] = T
356                    for t in range(intern[name]['T']):
357                        deltas.append([])
358                for t in range(intern[name]['T']):
359                    deltas[t].append(rep_data[t])
360                internal_ret_dict[key] = deltas
361                if name == name_list[0]:
362                    idl.append(rep_idl)
363
364    if kwargs.get("check_configs") is True:
365        if not silent:
366            print("Checking for missing configs...")
367        che = kwargs.get("check_configs")
368        if not (len(che) == len(idl)):
369            raise Exception("check_configs has to be the same length as replica!")
370        for r in range(len(idl)):
371            if not silent:
372                print("checking " + new_names[r])
373            check_idl(idl[r], che[r])
374        if not silent:
375            print("Done")
376
377    result_dict = {}
378    if keyed_out:
379        for key in needed_keys:
380            name = _key2specs(key)[0]
381            result = []
382            for t in range(intern[name]["T"]):
383                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
384            result_dict[key] = result
385    else:
386        for name, corr_type in zip(name_list, corr_type_list):
387            result_dict[name] = {}
388            for quarks in quarks_list:
389                result_dict[name][quarks] = {}
390                for off in noffset_list:
391                    result_dict[name][quarks][off] = {}
392                    for w in wf_list:
393                        result_dict[name][quarks][off][w] = {}
394                        if corr_type != 'bi':
395                            for w2 in wf2_list:
396                                key = _specs2key(name, quarks, off, w, w2)
397                                result = []
398                                for t in range(intern[name]["T"]):
399                                    result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
400                                result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
401                        else:
402                            key = _specs2key(name, quarks, off, w, "0")
403                            result = []
404                            for t in range(intern[name]["T"]):
405                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
406                            result_dict[name][quarks][str(off)][str(w)][str(0)] = result
407    return result_dict
408
409
410def _lists2key(*lists):
411    keys = []
412    for tup in itertools.product(*lists):
413        keys.append(sep.join(tup))
414    return keys
415
416
417def _key2specs(key):
418    return key.split(sep)
419
420
421def _specs2key(*specs):
422    return sep.join(specs)
423
424
425def _read_o_file(cfg_path, name, needed_keys, intern, version, im):
426    return_vals = {}
427    for key in needed_keys:
428        file = cfg_path + '/' + name
429        specs = _key2specs(key)
430        if specs[0] == name:
431            with open(file) as fp:
432                lines = fp.readlines()
433                quarks = specs[1]
434                off = specs[2]
435                w = specs[3]
436                w2 = specs[4]
437                T = intern[name]["T"]
438                start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
439                deltas = []
440                for line in lines[start_read:start_read + T]:
441                    floats = list(map(float, line.split()))
442                    if version == "0.0":
443                        deltas.append(floats[im - intern[name]["single"]])
444                    else:
445                        deltas.append(floats[1 + im - intern[name]["single"]])
446                return_vals[key] = deltas
447    return return_vals
448
449
450def _extract_corr_type(corr_type):
451    if corr_type == 'bb':
452        b2b = True
453        single = True
454    elif corr_type == 'bib':
455        b2b = True
456        single = False
457    else:
458        b2b = False
459        single = False
460    return b2b, single
461
462
463def _find_files(rep_path, prefix, compact, files=[]):
464    sub_ls = []
465    if not files == []:
466        files.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
467    else:
468        for (dirpath, dirnames, filenames) in os.walk(rep_path):
469            if compact:
470                sub_ls.extend(filenames)
471            else:
472                sub_ls.extend(dirnames)
473            break
474        if compact:
475            for exc in sub_ls:
476                if not fnmatch.fnmatch(exc, prefix + '*'):
477                    sub_ls = list(set(sub_ls) - set([exc]))
478            sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
479        else:
480            for exc in sub_ls:
481                if not fnmatch.fnmatch(exc, 'cfg*'):
482                    sub_ls = list(set(sub_ls) - set([exc]))
483            sub_ls.sort(key=lambda x: int(x[3:]))
484        files = sub_ls
485    if len(files) == 0:
486        raise FileNotFoundError("Did not find files in", rep_path, "with prefix", prefix, "and the given structure.")
487    return files
488
489
490def _make_pattern(version, name, noffset, wf, wf2, b2b, quarks):
491    if version == "0.0":
492        pattern = "# " + name + " : offset " + str(noffset) + ", wf " + str(wf)
493        if b2b:
494            pattern += ", wf_2 " + str(wf2)
495        qs = quarks.split(" ")
496        pattern += " : " + qs[0] + " - " + qs[1]
497    else:
498        pattern = 'name      ' + name + '\nquarks    ' + quarks + '\noffset    ' + str(noffset) + '\nwf        ' + str(wf)
499        if b2b:
500            pattern += '\nwf_2      ' + str(wf2)
501    return pattern
502
503
504def _find_correlator(file_name, version, pattern, b2b, silent=False):
505    T = 0
506
507    with open(file_name, "r") as my_file:
508
509        content = my_file.read()
510        match = re.search(pattern, content)
511        if match:
512            if version == "0.0":
513                start_read = content.count('\n', 0, match.start()) + 1
514                T = content.count('\n', start_read)
515            else:
516                start_read = content.count('\n', 0, match.start()) + 5 + b2b
517                end_match = re.search(r'\n\s*\n', content[match.start():])
518                T = content[match.start():].count('\n', 0, end_match.start()) - 4 - b2b
519            if not T > 0:
520                raise ValueError("Correlator with pattern\n" + pattern + "\nis empty!")
521            if not silent:
522                print(T, 'entries, starting to read in line', start_read)
523
524        else:
525            raise ValueError('Correlator with pattern\n' + pattern + '\nnot found.')
526
527    return start_read, T
528
529
530def _read_compact_file(rep_path, cfg_file, intern, needed_keys, im):
531    return_vals = {}
532    with open(rep_path + cfg_file) as fp:
533        lines = fp.readlines()
534        for key in needed_keys:
535            keys = _key2specs(key)
536            name = keys[0]
537            quarks = keys[1]
538            off = keys[2]
539            w = keys[3]
540            w2 = keys[4]
541
542            T = intern[name]["T"]
543            start_read = intern[name]["spec"][quarks][off][w][w2]["start"]
544            # check, if the correlator is in fact
545            # printed completely
546            if (start_read + T + 1 > len(lines)):
547                raise Exception("EOF before end of correlator data! Maybe " + rep_path + cfg_file + " is corrupted?")
548            corr_lines = lines[start_read - 6: start_read + T]
549            t_vals = []
550
551            if corr_lines[1 - intern[name]["b2b"]].strip() != 'name      ' + name:
552                raise Exception('Wrong format in file', cfg_file)
553
554            for k in range(6, T + 6):
555                floats = list(map(float, corr_lines[k].split()))
556                t_vals.append(floats[-2:][im])
557            return_vals[key] = t_vals
558    return return_vals
559
560
561def _read_compact_rep(path, rep, sub_ls, intern, needed_keys, im):
562    rep_path = path + '/' + rep + '/'
563    no_cfg = len(sub_ls)
564
565    return_vals = {}
566    for key in needed_keys:
567        name = _key2specs(key)[0]
568        deltas = []
569        for t in range(intern[name]["T"]):
570            deltas.append(np.zeros(no_cfg))
571        return_vals[key] = deltas
572
573    for cfg in range(no_cfg):
574        cfg_file = sub_ls[cfg]
575        cfg_data = _read_compact_file(rep_path, cfg_file, intern, needed_keys, im)
576        for key in needed_keys:
577            name = _key2specs(key)[0]
578            for t in range(intern[name]["T"]):
579                return_vals[key][t][cfg] = cfg_data[key][t]
580    return return_vals
581
582
583def _read_chunk(chunk, gauge_line, cfg_sep, start_read, T, corr_line, b2b, pattern, im, single):
584    try:
585        idl = int(chunk[gauge_line].split(cfg_sep)[-1])
586    except Exception:
587        raise Exception("Couldn't parse idl from directory, problem with chunk around line ", gauge_line)
588
589    found_pat = ""
590    data = []
591    for li in chunk[corr_line + 1:corr_line + 6 + b2b]:
592        found_pat += li
593    if re.search(pattern, found_pat):
594        for t, line in enumerate(chunk[start_read:start_read + T]):
595            floats = list(map(float, line.split()))
596            data.append(floats[im + 1 - single])
597    return idl, data
598
599
600def _read_append_rep(filename, pattern, b2b, cfg_separator, im, single):
601    with open(filename, 'r') as fp:
602        content = fp.readlines()
603        data_starts = []
604        for linenumber, line in enumerate(content):
605            if "[run]" in line:
606                data_starts.append(linenumber)
607        if len(set([data_starts[i] - data_starts[i - 1] for i in range(1, len(data_starts))])) > 1:
608            raise Exception("Irregularities in file structure found, not all runs have the same output length")
609        chunk = content[:data_starts[1]]
610        for linenumber, line in enumerate(chunk):
611            if line.startswith("gauge_name"):
612                gauge_line = linenumber
613            elif line.startswith("[correlator]"):
614                corr_line = linenumber
615                found_pat = ""
616                for li in chunk[corr_line + 1: corr_line + 6 + b2b]:
617                    found_pat += li
618                if re.search(pattern, found_pat):
619                    start_read = corr_line + 7 + b2b
620                    break
621                else:
622                    raise ValueError("Did not find pattern\n", pattern, "\nin\n", filename)
623        endline = corr_line + 6 + b2b
624        while not chunk[endline] == "\n":
625            endline += 1
626        T = endline - start_read
627
628        # all other chunks should follow the same structure
629        rep_idl = []
630        rep_data = []
631
632        for cnfg in range(len(data_starts)):
633            start = data_starts[cnfg]
634            stop = start + data_starts[1]
635            chunk = content[start:stop]
636            idl, data = _read_chunk(chunk, gauge_line, cfg_separator, start_read, T, corr_line, b2b, pattern, im, single)
637            rep_idl.append(idl)
638            rep_data.append(data)
639
640        data = []
641
642        for t in range(T):
643            data.append([])
644            for c in range(len(rep_data)):
645                data[t].append(rep_data[c][t])
646        return T, rep_idl, data
647
648
649def _get_rep_names(ls, ens_name=None):
650    new_names = []
651    for entry in ls:
652        try:
653            idx = entry.index('r')
654        except Exception:
655            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
656
657        if ens_name:
658            new_names.append('ens_name' + '|' + entry[idx:])
659        else:
660            new_names.append(entry[:idx] + '|' + entry[idx:])
661    return new_names
662
663
664def _get_appended_rep_names(ls, prefix, name, ens_name=None):
665    new_names = []
666    for exc in ls:
667        if not fnmatch.fnmatch(exc, prefix + '*.' + name):
668            ls = list(set(ls) - set([exc]))
669    ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
670    for entry in ls:
671        myentry = entry[:-len(name) - 1]
672        try:
673            idx = myentry.index('r')
674        except Exception:
675            raise Exception("Automatic recognition of replicum failed, please enter the key word 'names'.")
676
677        if ens_name:
678            new_names.append('ens_name' + '|' + entry[idx:])
679        else:
680            new_names.append(myentry[:idx] + '|' + myentry[idx:])
681    return new_names

sep = '/'

def read_sfcf( path, prefix, name, quarks='.*', corr_type='bi', noffset=0, wf=0, wf2=0, version='1.0c', cfg_separator='n', silent=False, **kwargs): View Source

14def read_sfcf(path, prefix, name, quarks='.*', corr_type="bi", noffset=0, wf=0, wf2=0, version="1.0c", cfg_separator="n", silent=False, **kwargs):
15    """Read sfcf files from given folder structure.
16
17    Parameters
18    ----------
19    path : str
20        Path to the sfcf files.
21    prefix : str
22        Prefix of the sfcf files.
23    name : str
24        Name of the correlation function to read.
25    quarks : str
26        Label of the quarks used in the sfcf input file. e.g. "quark quark"
27        for version 0.0 this does NOT need to be given with the typical " - "
28        that is present in the output file,
29        this is done automatically for this version
30    corr_type : str
31        Type of correlation function to read. Can be
32        - 'bi' for boundary-inner
33        - 'bb' for boundary-boundary
34        - 'bib' for boundary-inner-boundary
35    noffset : int
36        Offset of the source (only relevant when wavefunctions are used)
37    wf : int
38        ID of wave function
39    wf2 : int
40        ID of the second wavefunction
41        (only relevant for boundary-to-boundary correlation functions)
42    im : bool
43        if True, read imaginary instead of real part
44        of the correlation function.
45    names : list
46        Alternative labeling for replicas/ensembles.
47        Has to have the appropriate length
48    ens_name : str
49        replaces the name of the ensemble
50    version: str
51        version of SFCF, with which the measurement was done.
52        if the compact output option (-c) was specified,
53        append a "c" to the version (e.g. "1.0c")
54        if the append output option (-a) was specified,
55        append an "a" to the version
56    cfg_separator : str
57        String that separates the ensemble identifier from the configuration number (default 'n').
58    replica: list
59        list of replica to be read, default is all
60    files: list
61        list of files to be read per replica, default is all.
62        for non-compact output format, hand the folders to be read here.
63    check_configs: list[list[int]]
64        list of list of supposed configs, eg. [range(1,1000)]
65        for one replicum with 1000 configs
66
67    Returns
68    -------
69    result: list[Obs]
70        list of Observables with length T, observable per timeslice.
71        bb-type correlators have length 1.
72    """
73    ret = read_sfcf_multi(path, prefix, [name], quarks_list=[quarks], corr_type_list=[corr_type],
74                          noffset_list=[noffset], wf_list=[wf], wf2_list=[wf2], version=version,
75                          cfg_separator=cfg_separator, silent=silent, **kwargs)
76    return ret[name][quarks][str(noffset)][str(wf)][str(wf2)]

Read sfcf files from given folder structure.

Parameters

path (str): Path to the sfcf files.
prefix (str): Prefix of the sfcf files.
name (str): Name of the correlation function to read.
quarks (str): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
corr_type (str): Type of correlation function to read. Can be
- 'bi' for boundary-inner
- 'bb' for boundary-boundary
- 'bib' for boundary-inner-boundary
noffset (int): Offset of the source (only relevant when wavefunctions are used)
wf (int): ID of wave function
wf2 (int): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
im (bool): if True, read imaginary instead of real part of the correlation function.
names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
ens_name (str): replaces the name of the ensemble
version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
replica (list): list of replica to be read, default is all
files (list): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs

Returns

result (list[Obs]): list of Observables with length T, observable per timeslice. bb-type correlators have length 1.

def read_sfcf_multi( path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version='1.0c', cfg_separator='n', silent=False, keyed_out=False, **kwargs): View Source

 79def read_sfcf_multi(path, prefix, name_list, quarks_list=['.*'], corr_type_list=['bi'], noffset_list=[0], wf_list=[0], wf2_list=[0], version="1.0c", cfg_separator="n", silent=False, keyed_out=False, **kwargs):
 80    """Read sfcf files from given folder structure.
 81
 82    Parameters
 83    ----------
 84    path : str
 85        Path to the sfcf files.
 86    prefix : str
 87        Prefix of the sfcf files.
 88    name : str
 89        Name of the correlation function to read.
 90    quarks_list : list[str]
 91        Label of the quarks used in the sfcf input file. e.g. "quark quark"
 92        for version 0.0 this does NOT need to be given with the typical " - "
 93        that is present in the output file,
 94        this is done automatically for this version
 95    corr_type_list : list[str]
 96        Type of correlation function to read. Can be
 97        - 'bi' for boundary-inner
 98        - 'bb' for boundary-boundary
 99        - 'bib' for boundary-inner-boundary
100    noffset_list : list[int]
101        Offset of the source (only relevant when wavefunctions are used)
102    wf_list : int
103        ID of wave function
104    wf2_list : list[int]
105        ID of the second wavefunction
106        (only relevant for boundary-to-boundary correlation functions)
107    im : bool
108        if True, read imaginary instead of real part
109        of the correlation function.
110    names : list
111        Alternative labeling for replicas/ensembles.
112        Has to have the appropriate length
113    ens_name : str
114        replaces the name of the ensemble
115    version: str
116        version of SFCF, with which the measurement was done.
117        if the compact output option (-c) was specified,
118        append a "c" to the version (e.g. "1.0c")
119        if the append output option (-a) was specified,
120        append an "a" to the version
121    cfg_separator : str
122        String that separates the ensemble identifier from the configuration number (default 'n').
123    replica: list
124        list of replica to be read, default is all
125    files: list[list[int]]
126        list of files to be read per replica, default is all.
127        for non-compact output format, hand the folders to be read here.
128    check_configs: list[list[int]]
129        list of list of supposed configs, eg. [range(1,1000)]
130        for one replicum with 1000 configs
131
132    Returns
133    -------
134    result: dict[list[Obs]]
135        dict with one of the following properties:
136        if keyed_out:
137            dict[key] = list[Obs]
138            where key has the form name/quarks/offset/wf/wf2
139        if not keyed_out:
140            dict[name][quarks][offset][wf][wf2] = list[Obs]
141    """
142
143    if kwargs.get('im'):
144        im = 1
145        part = 'imaginary'
146    else:
147        im = 0
148        part = 'real'
149
150    known_versions = ["0.0", "1.0", "2.0", "1.0c", "2.0c", "1.0a", "2.0a"]
151
152    if version not in known_versions:
153        raise Exception("This version is not known!")
154    if (version[-1] == "c"):
155        appended = False
156        compact = True
157        version = version[:-1]
158    elif (version[-1] == "a"):
159        appended = True
160        compact = False
161        version = version[:-1]
162    else:
163        compact = False
164        appended = False
165    ls = []
166    if "replica" in kwargs:
167        ls = kwargs.get("replica")
168    else:
169        for (dirpath, dirnames, filenames) in os.walk(path):
170            if not appended:
171                ls.extend(dirnames)
172            else:
173                ls.extend(filenames)
174            break
175        if not ls:
176            raise Exception('Error, directory not found')
177        # Exclude folders with different names
178        for exc in ls:
179            if not fnmatch.fnmatch(exc, prefix + '*'):
180                ls = list(set(ls) - set([exc]))
181
182    if not appended:
183        ls = sort_names(ls)
184        replica = len(ls)
185
186    else:
187        replica = len([file.split(".")[-1] for file in ls]) // len(set([file.split(".")[-1] for file in ls]))
188    if replica == 0:
189        raise Exception('No replica found in directory')
190    if not silent:
191        print('Read', part, 'part of', name_list, 'from', prefix[:-1], ',', replica, 'replica')
192
193    if 'names' in kwargs:
194        new_names = kwargs.get('names')
195        if len(new_names) != len(set(new_names)):
196            raise Exception("names are not unique!")
197        if len(new_names) != replica:
198            raise Exception('names should have the length', replica)
199
200    else:
201        ens_name = kwargs.get("ens_name")
202        if not appended:
203            new_names = _get_rep_names(ls, ens_name)
204        else:
205            new_names = _get_appended_rep_names(ls, prefix, name_list[0], ens_name)
206        new_names = sort_names(new_names)
207
208    idl = []
209
210    noffset_list = [str(x) for x in noffset_list]
211    wf_list = [str(x) for x in wf_list]
212    wf2_list = [str(x) for x in wf2_list]
213
214    # setup dict structures
215    intern = {}
216    for name, corr_type in zip(name_list, corr_type_list):
217        intern[name] = {}
218        b2b, single = _extract_corr_type(corr_type)
219        intern[name]["b2b"] = b2b
220        intern[name]["single"] = single
221        intern[name]["spec"] = {}
222        for quarks in quarks_list:
223            intern[name]["spec"][quarks] = {}
224            for off in noffset_list:
225                intern[name]["spec"][quarks][off] = {}
226                for w in wf_list:
227                    intern[name]["spec"][quarks][off][w] = {}
228                    if b2b:
229                        for w2 in wf2_list:
230                            intern[name]["spec"][quarks][off][w][w2] = {}
231                            intern[name]["spec"][quarks][off][w][w2]["pattern"] = _make_pattern(version, name, off, w, w2, intern[name]['b2b'], quarks)
232                    else:
233                        intern[name]["spec"][quarks][off][w]["0"] = {}
234                        intern[name]["spec"][quarks][off][w]["0"]["pattern"] = _make_pattern(version, name, off, w, 0, intern[name]['b2b'], quarks)
235
236    internal_ret_dict = {}
237    needed_keys = []
238    for name, corr_type in zip(name_list, corr_type_list):
239        b2b, single = _extract_corr_type(corr_type)
240        if b2b:
241            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, wf2_list))
242        else:
243            needed_keys.extend(_lists2key([name], quarks_list, noffset_list, wf_list, ["0"]))
244
245    for key in needed_keys:
246        internal_ret_dict[key] = []
247
248    if not appended:
249        for i, item in enumerate(ls):
250            rep_path = path + '/' + item
251            if "files" in kwargs:
252                files = kwargs.get("files")
253                if isinstance(files, list):
254                    if all(isinstance(f, list) for f in files):
255                        files = files[i]
256                    elif all(isinstance(f, str) for f in files):
257                        files = files
258                    else:
259                        raise TypeError("files has to be of type list[list[str]] or list[str]!")
260                else:
261                    raise TypeError("files has to be of type list[list[str]] or list[str]!")
262
263            else:
264                files = []
265            sub_ls = _find_files(rep_path, prefix, compact, files)
266            rep_idl = []
267            no_cfg = len(sub_ls)
268            for cfg in sub_ls:
269                try:
270                    if compact:
271                        rep_idl.append(int(cfg.split(cfg_separator)[-1]))
272                    else:
273                        rep_idl.append(int(cfg[3:]))
274                except Exception:
275                    raise Exception("Couldn't parse idl from directory, problem with file " + cfg)
276            rep_idl.sort()
277            # maybe there is a better way to print the idls
278            if not silent:
279                print(item, ':', no_cfg, ' configurations')
280            idl.append(rep_idl)
281            # here we have found all the files we need to look into.
282            if i == 0:
283                if version != "0.0" and compact:
284                    file = path + '/' + item + '/' + sub_ls[0]
285                for name_index, name in enumerate(name_list):
286                    if version == "0.0" or not compact:
287                        file = path + '/' + item + '/' + sub_ls[0] + '/' + name
288                    if corr_type_list[name_index] == 'bi':
289                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, ["0"])
290                    else:
291                        name_keys = _lists2key(quarks_list, noffset_list, wf_list, wf2_list)
292                    for key in name_keys:
293                        specs = _key2specs(key)
294                        quarks = specs[0]
295                        off = specs[1]
296                        w = specs[2]
297                        w2 = specs[3]
298                        # here, we want to find the place within the file,
299                        # where the correlator we need is stored.
300                        # to do so, the pattern needed is put together
301                        # from the input values
302                        start_read, T = _find_correlator(file, version, intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["pattern"], intern[name]['b2b'], silent=silent)
303                        intern[name]["spec"][quarks][str(off)][str(w)][str(w2)]["start"] = start_read
304                        intern[name]["T"] = T
305                        # preparing the datastructure
306                        # the correlators get parsed into...
307                        deltas = []
308                        for j in range(intern[name]["T"]):
309                            deltas.append([])
310                        internal_ret_dict[sep.join([name, key])] = deltas
311
312            if compact:
313                rep_deltas = _read_compact_rep(path, item, sub_ls, intern, needed_keys, im)
314                for key in needed_keys:
315                    name = _key2specs(key)[0]
316                    for t in range(intern[name]["T"]):
317                        internal_ret_dict[key][t].append(rep_deltas[key][t])
318            else:
319                for key in needed_keys:
320                    rep_data = []
321                    name = _key2specs(key)[0]
322                    for subitem in sub_ls:
323                        cfg_path = path + '/' + item + '/' + subitem
324                        file_data = _read_o_file(cfg_path, name, needed_keys, intern, version, im)
325                        rep_data.append(file_data)
326                    for t in range(intern[name]["T"]):
327                        internal_ret_dict[key][t].append([])
328                        for cfg in range(no_cfg):
329                            internal_ret_dict[key][t][i].append(rep_data[cfg][key][t])
330    else:
331        for key in needed_keys:
332            specs = _key2specs(key)
333            name = specs[0]
334            quarks = specs[1]
335            off = specs[2]
336            w = specs[3]
337            w2 = specs[4]
338            if "files" in kwargs:
339                if isinstance(kwargs.get("files"), list) and all(isinstance(f, str) for f in kwargs.get("files")):
340                    name_ls = kwargs.get("files")
341                else:
342                    raise TypeError("In append mode, files has to be of type list[str]!")
343            else:
344                name_ls = ls
345                for exc in name_ls:
346                    if not fnmatch.fnmatch(exc, prefix + '*.' + name):
347                        name_ls = list(set(name_ls) - set([exc]))
348            name_ls = sort_names(name_ls)
349            pattern = intern[name]['spec'][quarks][off][w][w2]['pattern']
350            deltas = []
351            for rep, file in enumerate(name_ls):
352                rep_idl = []
353                filename = path + '/' + file
354                T, rep_idl, rep_data = _read_append_rep(filename, pattern, intern[name]['b2b'], cfg_separator, im, intern[name]['single'])
355                if rep == 0:
356                    intern[name]['T'] = T
357                    for t in range(intern[name]['T']):
358                        deltas.append([])
359                for t in range(intern[name]['T']):
360                    deltas[t].append(rep_data[t])
361                internal_ret_dict[key] = deltas
362                if name == name_list[0]:
363                    idl.append(rep_idl)
364
365    if kwargs.get("check_configs") is True:
366        if not silent:
367            print("Checking for missing configs...")
368        che = kwargs.get("check_configs")
369        if not (len(che) == len(idl)):
370            raise Exception("check_configs has to be the same length as replica!")
371        for r in range(len(idl)):
372            if not silent:
373                print("checking " + new_names[r])
374            check_idl(idl[r], che[r])
375        if not silent:
376            print("Done")
377
378    result_dict = {}
379    if keyed_out:
380        for key in needed_keys:
381            name = _key2specs(key)[0]
382            result = []
383            for t in range(intern[name]["T"]):
384                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
385            result_dict[key] = result
386    else:
387        for name, corr_type in zip(name_list, corr_type_list):
388            result_dict[name] = {}
389            for quarks in quarks_list:
390                result_dict[name][quarks] = {}
391                for off in noffset_list:
392                    result_dict[name][quarks][off] = {}
393                    for w in wf_list:
394                        result_dict[name][quarks][off][w] = {}
395                        if corr_type != 'bi':
396                            for w2 in wf2_list:
397                                key = _specs2key(name, quarks, off, w, w2)
398                                result = []
399                                for t in range(intern[name]["T"]):
400                                    result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
401                                result_dict[name][quarks][str(off)][str(w)][str(w2)] = result
402                        else:
403                            key = _specs2key(name, quarks, off, w, "0")
404                            result = []
405                            for t in range(intern[name]["T"]):
406                                result.append(Obs(internal_ret_dict[key][t], new_names, idl=idl))
407                            result_dict[name][quarks][str(off)][str(w)][str(0)] = result
408    return result_dict

Read sfcf files from given folder structure.

Parameters

path (str): Path to the sfcf files.
prefix (str): Prefix of the sfcf files.
name (str): Name of the correlation function to read.
quarks_list (list[str]): Label of the quarks used in the sfcf input file. e.g. "quark quark" for version 0.0 this does NOT need to be given with the typical " - " that is present in the output file, this is done automatically for this version
corr_type_list (list[str]): Type of correlation function to read. Can be
- 'bi' for boundary-inner
- 'bb' for boundary-boundary
- 'bib' for boundary-inner-boundary
noffset_list (list[int]): Offset of the source (only relevant when wavefunctions are used)
wf_list (int): ID of wave function
wf2_list (list[int]): ID of the second wavefunction (only relevant for boundary-to-boundary correlation functions)
im (bool): if True, read imaginary instead of real part of the correlation function.
names (list): Alternative labeling for replicas/ensembles. Has to have the appropriate length
ens_name (str): replaces the name of the ensemble
version (str): version of SFCF, with which the measurement was done. if the compact output option (-c) was specified, append a "c" to the version (e.g. "1.0c") if the append output option (-a) was specified, append an "a" to the version
cfg_separator (str): String that separates the ensemble identifier from the configuration number (default 'n').
replica (list): list of replica to be read, default is all
files (list[list[int]]): list of files to be read per replica, default is all. for non-compact output format, hand the folders to be read here.
check_configs (list[list[int]]): list of list of supposed configs, eg. [range(1,1000)] for one replicum with 1000 configs

Returns

result (dict[list[Obs]]): dict with one of the following properties: if keyed_out: dict[key] = list[Obs] where key has the form name/quarks/offset/wf/wf2 if not keyed_out: dict[name][quarks][offset][wf][wf2] = list[Obs]