From c6c8cf67a516de84f84b123a0ab9f41c888bfcc0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 7 Apr 2025 10:36:59 +0000 Subject: [PATCH] smaller footprint for compact sfcf measuremetns --- corrlib/input/sfcf.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 9feeaa1..403d17a 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -226,7 +226,7 @@ def _map_params(params: dict, spec_list: list) -> dict[str, Any]: return new_specs -def get_specs(key, parameters, sep='/'): +def get_specs(key, parameters, sep='/') -> str: key_parts = key.split(sep) if corr_types[key_parts[0]] == 'bi': param = _map_params(parameters, key_parts[1:-1]) @@ -267,15 +267,34 @@ def read_data(path, project, dir_in_project, prefix, param, version='1.0c', cfg_ names = kwargs.get('names', None) directory = os.path.join(path, "projects", project, dir_in_project) + appended = (version[-1] == "a") + ls = [] + files_to_get = [] + for (dirpath, dirnames, filenames) in os.walk(directory): + if not appended: + ls.extend(dirnames) + else: + ls.extend(filenames) + break + if not appended: + compact = (version[-1] == "c") + for i, item in enumerate(ls): + rep_path = directory + '/' + item + sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, []) + files_to_get.extend([rep_path + "/" + filename for filename in sub_ls]) + + print("Getting data, this might take a while...") - dl.get(directory, dataset=path) + if len(files_to_get) != 0: + dl.get(files_to_get, dataset= os.path.join(path, "projects", project), jobs=4) + else: + dl.get(directory, dataset= os.path.join(path, "projects", project), jobs=4) print("... done downloading.") corr_type_list = [] for corr_name in param['crr']: if corr_name not in corr_types: raise ValueError('Correlator type not known.') corr_type_list.append(corr_types[corr_name]) - data = {} if not param['crr'] == []: if names is not None: @@ -286,8 +305,8 @@ def read_data(path, project, dir_in_project, prefix, param, version='1.0c', cfg_ range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True) for key in data_crr.keys(): data[key] = data_crr[key] - print("Read data:", data_crr) - print(f"Read data: pe.input.sfcf.read_sfcf_multi({directory}, {prefix}, {param['crr']}, {param['mrr']}, {corr_type_list}, {range(len(param['wf_offsets']))}, {range(len(param['wf_basis']))}, {range(len(param['wf_basis']))}, {version}, {cfg_seperator}, keyed_out=True, names={names})") + # print("Read data:", data_crr) + # print(f"Read data: pe.input.sfcf.read_sfcf_multi({directory}, {prefix}, {param['crr']}, {param['mrr']}, {corr_type_list}, {range(len(param['wf_offsets']))}, {range(len(param['wf_basis']))}, {range(len(param['wf_basis']))}, {version}, {cfg_seperator}, keyed_out=True, names={names})") if not param['crs'] == []: data_crs = pe.input.sfcf.read_sfcf_multi(directory, param['crs']) for key in data_crs.keys():