pyerrors.input.utils
Utilities for the input
1"""Utilities for the input""" 2 3import re 4import fnmatch 5import os 6 7 8def sort_names(ll): 9 """Sorts a list of names of replika with searches for `r` and `id` in the replikum string. 10 If this search fails, a fallback method is used, 11 where the strings are simply compared and the first diffeing numeral is used for differentiation. 12 13 Parameters 14 ---------- 15 ll: list 16 list to sort 17 18 Returns 19 ------- 20 ll: list 21 sorted list 22 """ 23 24 if len(ll) > 1: 25 sorted = False 26 r_pattern = r'r(\d+)' 27 id_pattern = r'id(\d+)' 28 29 # sort list by id first 30 if all([re.search(id_pattern, entry) for entry in ll]): 31 ll.sort(key=lambda x: int(re.findall(id_pattern, x)[0])) 32 sorted = True 33 # then by replikum 34 if all([re.search(r_pattern, entry) for entry in ll]): 35 ll.sort(key=lambda x: int(re.findall(r_pattern, x)[0])) 36 sorted = True 37 # as the rearrangements by one key let the other key untouched, the list is sorted now 38 39 if not sorted: 40 # fallback 41 sames = '' 42 for i in range(len(ll[0])): 43 checking = ll[0][i] 44 for rn in ll[1:]: 45 is_same = (rn[i] == checking) 46 if is_same: 47 sames += checking 48 else: 49 break 50 print("Using prefix:", sames) 51 ll.sort(key=lambda x: int(re.findall(r'\d+', x[len(sames):])[0])) 52 return ll 53 54 55def check_idl(idl, che): 56 """Checks if list of configurations is contained in an idl 57 58 Parameters 59 ---------- 60 idl : range or list 61 idl of the current replicum 62 che : list 63 list of configurations to be checked against 64 65 Returns 66 ------- 67 miss_str : str 68 string with integers of which idls are missing 69 """ 70 71 missing = [] 72 for c in che: 73 if c not in idl: 74 missing.append(c) 75 # print missing configurations such that it can directly be parsed to slurm terminal 76 if not (len(missing) == 0): 77 print(len(missing), "configs missing") 78 miss_str = str(missing[0]) 79 for i in missing[1:]: 80 miss_str += "," + str(i) 81 print(miss_str) 82 return miss_str 83 84 85def check_params(path, param_hash, prefix, param_prefix="parameters_"): 86 """ 87 Check if, for sfcf, the parameter hashes at the end of the parameter files are in fact the expected one. 88 89 Parameters 90 ---------- 91 path: str 92 measurement path, same as for sfcf read method 93 param_hash: str 94 expected parameter hash 95 prefix: str 96 data prefix to find the appropriate replicum folders in path 97 param_prefix: str 98 prefix of the parameter file. Defaults to 'parameters_' 99 100 Returns 101 ------- 102 nums: dict 103 dictionary of faulty parameter files sorted by the replica paths 104 """ 105 106 ls = [] 107 for (dirpath, dirnames, filenames) in os.walk(path): 108 ls.extend(dirnames) 109 break 110 if not ls: 111 raise Exception('Error, directory not found') 112 # Exclude folders with different names 113 for exc in ls: 114 if not fnmatch.fnmatch(exc, prefix + '*'): 115 ls = list(set(ls) - set([exc])) 116 117 ls = sort_names(ls) 118 nums = {} 119 for rep in ls: 120 rep_path = path + '/' + rep 121 # files of replicum 122 sub_ls = [] 123 for (dirpath, dirnames, filenames) in os.walk(rep_path): 124 sub_ls.extend(filenames) 125 126 # filter 127 param_files = [] 128 for file in sub_ls: 129 if fnmatch.fnmatch(file, param_prefix + '*'): 130 param_files.append(file) 131 132 rep_nums = '' 133 for file in param_files: 134 with open(rep_path + '/' + file) as fp: 135 for line in fp: 136 pass 137 last_line = line 138 if last_line.split()[2] != param_hash: 139 rep_nums += file.split("_")[1] + ',' 140 nums[rep_path] = rep_nums 141 142 if not len(rep_nums) == 0: 143 raise Warning("found differing parameter hash in the param files in " + rep_path) 144 return nums
def
sort_names(ll):
9def sort_names(ll): 10 """Sorts a list of names of replika with searches for `r` and `id` in the replikum string. 11 If this search fails, a fallback method is used, 12 where the strings are simply compared and the first diffeing numeral is used for differentiation. 13 14 Parameters 15 ---------- 16 ll: list 17 list to sort 18 19 Returns 20 ------- 21 ll: list 22 sorted list 23 """ 24 25 if len(ll) > 1: 26 sorted = False 27 r_pattern = r'r(\d+)' 28 id_pattern = r'id(\d+)' 29 30 # sort list by id first 31 if all([re.search(id_pattern, entry) for entry in ll]): 32 ll.sort(key=lambda x: int(re.findall(id_pattern, x)[0])) 33 sorted = True 34 # then by replikum 35 if all([re.search(r_pattern, entry) for entry in ll]): 36 ll.sort(key=lambda x: int(re.findall(r_pattern, x)[0])) 37 sorted = True 38 # as the rearrangements by one key let the other key untouched, the list is sorted now 39 40 if not sorted: 41 # fallback 42 sames = '' 43 for i in range(len(ll[0])): 44 checking = ll[0][i] 45 for rn in ll[1:]: 46 is_same = (rn[i] == checking) 47 if is_same: 48 sames += checking 49 else: 50 break 51 print("Using prefix:", sames) 52 ll.sort(key=lambda x: int(re.findall(r'\d+', x[len(sames):])[0])) 53 return ll
Sorts a list of names of replika with searches for r
and id
in the replikum string.
If this search fails, a fallback method is used,
where the strings are simply compared and the first diffeing numeral is used for differentiation.
Parameters
- ll (list): list to sort
Returns
- ll (list): sorted list
def
check_idl(idl, che):
56def check_idl(idl, che): 57 """Checks if list of configurations is contained in an idl 58 59 Parameters 60 ---------- 61 idl : range or list 62 idl of the current replicum 63 che : list 64 list of configurations to be checked against 65 66 Returns 67 ------- 68 miss_str : str 69 string with integers of which idls are missing 70 """ 71 72 missing = [] 73 for c in che: 74 if c not in idl: 75 missing.append(c) 76 # print missing configurations such that it can directly be parsed to slurm terminal 77 if not (len(missing) == 0): 78 print(len(missing), "configs missing") 79 miss_str = str(missing[0]) 80 for i in missing[1:]: 81 miss_str += "," + str(i) 82 print(miss_str) 83 return miss_str
Checks if list of configurations is contained in an idl
Parameters
- idl (range or list): idl of the current replicum
- che (list): list of configurations to be checked against
Returns
- miss_str (str): string with integers of which idls are missing
def
check_params(path, param_hash, prefix, param_prefix='parameters_'):
86def check_params(path, param_hash, prefix, param_prefix="parameters_"): 87 """ 88 Check if, for sfcf, the parameter hashes at the end of the parameter files are in fact the expected one. 89 90 Parameters 91 ---------- 92 path: str 93 measurement path, same as for sfcf read method 94 param_hash: str 95 expected parameter hash 96 prefix: str 97 data prefix to find the appropriate replicum folders in path 98 param_prefix: str 99 prefix of the parameter file. Defaults to 'parameters_' 100 101 Returns 102 ------- 103 nums: dict 104 dictionary of faulty parameter files sorted by the replica paths 105 """ 106 107 ls = [] 108 for (dirpath, dirnames, filenames) in os.walk(path): 109 ls.extend(dirnames) 110 break 111 if not ls: 112 raise Exception('Error, directory not found') 113 # Exclude folders with different names 114 for exc in ls: 115 if not fnmatch.fnmatch(exc, prefix + '*'): 116 ls = list(set(ls) - set([exc])) 117 118 ls = sort_names(ls) 119 nums = {} 120 for rep in ls: 121 rep_path = path + '/' + rep 122 # files of replicum 123 sub_ls = [] 124 for (dirpath, dirnames, filenames) in os.walk(rep_path): 125 sub_ls.extend(filenames) 126 127 # filter 128 param_files = [] 129 for file in sub_ls: 130 if fnmatch.fnmatch(file, param_prefix + '*'): 131 param_files.append(file) 132 133 rep_nums = '' 134 for file in param_files: 135 with open(rep_path + '/' + file) as fp: 136 for line in fp: 137 pass 138 last_line = line 139 if last_line.split()[2] != param_hash: 140 rep_nums += file.split("_")[1] + ',' 141 nums[rep_path] = rep_nums 142 143 if not len(rep_nums) == 0: 144 raise Warning("found differing parameter hash in the param files in " + rep_path) 145 return nums
Check if, for sfcf, the parameter hashes at the end of the parameter files are in fact the expected one.
Parameters
- path (str): measurement path, same as for sfcf read method
- param_hash (str): expected parameter hash
- prefix (str): data prefix to find the appropriate replicum folders in path
- param_prefix (str): prefix of the parameter file. Defaults to 'parameters_'
Returns
- nums (dict): dictionary of faulty parameter files sorted by the replica paths