pyerrors.input.utils

Utilities for the input

  1"""Utilities for the input"""
  2
  3import re
  4import fnmatch
  5import os
  6
  7
  8def sort_names(ll):
  9    """Sorts a list of names of replika with searches for `r` and `id` in the replikum string.
 10    If this search fails, a fallback method is used,
 11    where the strings are simply compared and the first diffeing numeral is used for differentiation.
 12
 13    Parameters
 14    ----------
 15    ll: list
 16        list to sort
 17
 18    Returns
 19    -------
 20    ll: list
 21        sorted list
 22    """
 23
 24    if len(ll) > 1:
 25        sorted = False
 26        r_pattern = r'r(\d+)'
 27        id_pattern = r'id(\d+)'
 28
 29        # sort list by id first
 30        if all([re.search(id_pattern, entry) for entry in ll]):
 31            ll.sort(key=lambda x: int(re.findall(id_pattern, x)[0]))
 32            sorted = True
 33        # then by replikum
 34        if all([re.search(r_pattern, entry) for entry in ll]):
 35            ll.sort(key=lambda x: int(re.findall(r_pattern, x)[0]))
 36            sorted = True
 37        # as the rearrangements by one key let the other key untouched, the list is sorted now
 38
 39        if not sorted:
 40            # fallback
 41            sames = ''
 42            for i in range(len(ll[0])):
 43                checking = ll[0][i]
 44                for rn in ll[1:]:
 45                    is_same = (rn[i] == checking)
 46                if is_same:
 47                    sames += checking
 48                else:
 49                    break
 50            print("Using prefix:", sames)
 51            ll.sort(key=lambda x: int(re.findall(r'\d+', x[len(sames):])[0]))
 52    return ll
 53
 54
 55def check_idl(idl, che):
 56    """Checks if list of configurations is contained in an idl
 57
 58    Parameters
 59    ----------
 60    idl : range or list
 61        idl of the current replicum
 62    che : list
 63        list of configurations to be checked against
 64
 65    Returns
 66    -------
 67    miss_str : str
 68        string with integers of which idls are missing
 69    """
 70
 71    missing = []
 72    for c in che:
 73        if c not in idl:
 74            missing.append(c)
 75    # print missing configurations such that it can directly be parsed to slurm terminal
 76    if not (len(missing) == 0):
 77        print(len(missing), "configs missing")
 78        miss_str = str(missing[0])
 79        for i in missing[1:]:
 80            miss_str += "," + str(i)
 81        print(miss_str)
 82    return miss_str
 83
 84
 85def check_params(path, param_hash, prefix, param_prefix="parameters_"):
 86    """
 87    Check if, for sfcf, the parameter hashes at the end of the parameter files are in fact the expected one.
 88
 89    Parameters
 90    ----------
 91    path: str
 92        measurement path, same as for sfcf read method
 93    param_hash: str
 94        expected parameter hash
 95    prefix: str
 96        data prefix to find the appropriate replicum folders in path
 97    param_prefix: str
 98        prefix of the parameter file. Defaults to 'parameters_'
 99
100    Returns
101    -------
102    nums: dict
103        dictionary of faulty parameter files sorted by the replica paths
104    """
105
106    ls = []
107    for (dirpath, dirnames, filenames) in os.walk(path):
108        ls.extend(dirnames)
109        break
110    if not ls:
111        raise Exception('Error, directory not found')
112    # Exclude folders with different names
113    for exc in ls:
114        if not fnmatch.fnmatch(exc, prefix + '*'):
115            ls = list(set(ls) - set([exc]))
116
117    ls = sort_names(ls)
118    nums = {}
119    for rep in ls:
120        rep_path = path + '/' + rep
121        # files of replicum
122        sub_ls = []
123        for (dirpath, dirnames, filenames) in os.walk(rep_path):
124            sub_ls.extend(filenames)
125
126        # filter
127        param_files = []
128        for file in sub_ls:
129            if fnmatch.fnmatch(file, param_prefix + '*'):
130                param_files.append(file)
131
132        rep_nums = ''
133        for file in param_files:
134            with open(rep_path + '/' + file) as fp:
135                for line in fp:
136                    pass
137                last_line = line
138                if last_line.split()[2] != param_hash:
139                    rep_nums += file.split("_")[1] + ','
140        nums[rep_path] = rep_nums
141
142        if not len(rep_nums) == 0:
143            raise Warning("found differing parameter hash in the param files in " + rep_path)
144    return nums
def sort_names(ll):
 9def sort_names(ll):
10    """Sorts a list of names of replika with searches for `r` and `id` in the replikum string.
11    If this search fails, a fallback method is used,
12    where the strings are simply compared and the first diffeing numeral is used for differentiation.
13
14    Parameters
15    ----------
16    ll: list
17        list to sort
18
19    Returns
20    -------
21    ll: list
22        sorted list
23    """
24
25    if len(ll) > 1:
26        sorted = False
27        r_pattern = r'r(\d+)'
28        id_pattern = r'id(\d+)'
29
30        # sort list by id first
31        if all([re.search(id_pattern, entry) for entry in ll]):
32            ll.sort(key=lambda x: int(re.findall(id_pattern, x)[0]))
33            sorted = True
34        # then by replikum
35        if all([re.search(r_pattern, entry) for entry in ll]):
36            ll.sort(key=lambda x: int(re.findall(r_pattern, x)[0]))
37            sorted = True
38        # as the rearrangements by one key let the other key untouched, the list is sorted now
39
40        if not sorted:
41            # fallback
42            sames = ''
43            for i in range(len(ll[0])):
44                checking = ll[0][i]
45                for rn in ll[1:]:
46                    is_same = (rn[i] == checking)
47                if is_same:
48                    sames += checking
49                else:
50                    break
51            print("Using prefix:", sames)
52            ll.sort(key=lambda x: int(re.findall(r'\d+', x[len(sames):])[0]))
53    return ll

Sorts a list of names of replika with searches for r and id in the replikum string. If this search fails, a fallback method is used, where the strings are simply compared and the first diffeing numeral is used for differentiation.

Parameters
  • ll (list): list to sort
Returns
  • ll (list): sorted list
def check_idl(idl, che):
56def check_idl(idl, che):
57    """Checks if list of configurations is contained in an idl
58
59    Parameters
60    ----------
61    idl : range or list
62        idl of the current replicum
63    che : list
64        list of configurations to be checked against
65
66    Returns
67    -------
68    miss_str : str
69        string with integers of which idls are missing
70    """
71
72    missing = []
73    for c in che:
74        if c not in idl:
75            missing.append(c)
76    # print missing configurations such that it can directly be parsed to slurm terminal
77    if not (len(missing) == 0):
78        print(len(missing), "configs missing")
79        miss_str = str(missing[0])
80        for i in missing[1:]:
81            miss_str += "," + str(i)
82        print(miss_str)
83    return miss_str

Checks if list of configurations is contained in an idl

Parameters
  • idl (range or list): idl of the current replicum
  • che (list): list of configurations to be checked against
Returns
  • miss_str (str): string with integers of which idls are missing
def check_params(path, param_hash, prefix, param_prefix='parameters_'):
 86def check_params(path, param_hash, prefix, param_prefix="parameters_"):
 87    """
 88    Check if, for sfcf, the parameter hashes at the end of the parameter files are in fact the expected one.
 89
 90    Parameters
 91    ----------
 92    path: str
 93        measurement path, same as for sfcf read method
 94    param_hash: str
 95        expected parameter hash
 96    prefix: str
 97        data prefix to find the appropriate replicum folders in path
 98    param_prefix: str
 99        prefix of the parameter file. Defaults to 'parameters_'
100
101    Returns
102    -------
103    nums: dict
104        dictionary of faulty parameter files sorted by the replica paths
105    """
106
107    ls = []
108    for (dirpath, dirnames, filenames) in os.walk(path):
109        ls.extend(dirnames)
110        break
111    if not ls:
112        raise Exception('Error, directory not found')
113    # Exclude folders with different names
114    for exc in ls:
115        if not fnmatch.fnmatch(exc, prefix + '*'):
116            ls = list(set(ls) - set([exc]))
117
118    ls = sort_names(ls)
119    nums = {}
120    for rep in ls:
121        rep_path = path + '/' + rep
122        # files of replicum
123        sub_ls = []
124        for (dirpath, dirnames, filenames) in os.walk(rep_path):
125            sub_ls.extend(filenames)
126
127        # filter
128        param_files = []
129        for file in sub_ls:
130            if fnmatch.fnmatch(file, param_prefix + '*'):
131                param_files.append(file)
132
133        rep_nums = ''
134        for file in param_files:
135            with open(rep_path + '/' + file) as fp:
136                for line in fp:
137                    pass
138                last_line = line
139                if last_line.split()[2] != param_hash:
140                    rep_nums += file.split("_")[1] + ','
141        nums[rep_path] = rep_nums
142
143        if not len(rep_nums) == 0:
144            raise Warning("found differing parameter hash in the param files in " + rep_path)
145    return nums

Check if, for sfcf, the parameter hashes at the end of the parameter files are in fact the expected one.

Parameters
  • path (str): measurement path, same as for sfcf read method
  • param_hash (str): expected parameter hash
  • prefix (str): data prefix to find the appropriate replicum folders in path
  • param_prefix (str): prefix of the parameter file. Defaults to 'parameters_'
Returns
  • nums (dict): dictionary of faulty parameter files sorted by the replica paths