From 7fc8e91a3ecc2492052e0d9d928f353143268030 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 17 Apr 2025 08:36:31 +0000 Subject: [PATCH 1/4] implement t0 and t1 import from openQCD, allow no param file for these --- corrlib/input/openQCD.py | 109 +++++++++++++++++++++++++++++++++++---- corrlib/input/sfcf.py | 4 +- corrlib/meas_io.py | 38 ++++++++++---- corrlib/toml.py | 25 +++++++-- 4 files changed, 148 insertions(+), 28 deletions(-) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index d17e943..0342a00 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -5,13 +5,14 @@ import fnmatch from typing import Any -def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: - file = path + "/projects/" + project + '/' + file_in_project - dl.get(file, dataset=path + "/projects/" + project) +def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: + file = os.path.join(path, "projects", project, file_in_project) + ds = os.path.join(path, "projects", project) + dl.get(file, dataset=ds) with open(file, 'r') as fp: lines = fp.readlines() fp.close() - param = {} + param: dict[str, Any] = {} param['rw_fcts'] = [] param['rand'] = {} @@ -48,17 +49,34 @@ def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: param["rw_fcts"][nrw]["irp"] = "None" return param - -def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: list[str]=None, files: list[str]=None): - directory = path + "/projects/" + project + '/' + dir_in_project + +def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: + file = os.path.join(path, "projects", project, file_in_project) + ds = os.path.join(path, "projects", project) + dl.get(file, dataset=ds) + with open(file, 'r') as fp: + lines = fp.readlines() + fp.close() + param = {} + for line in lines: + line = line.strip() + for rwp in ["integrator", "eps", "ntot", "dnms"]: + if line.startswith(rwp): + param[rwp] = line.split()[1] + return param + + +def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: list[str]=None, files: list[str]=None) -> dict[str, Any]: + dataset = os.path.join(path, "projects", project) + directory = os.path.join(dataset, dir_in_project) if files is None: files = [] for root, ds, fs in os.walk(directory): for f in fs: if fnmatch.fnmatch(f, prefix + "*" + postfix + ".dat"): files.append(f) - dl.get([directory + "/" + f for f in files], dataset=path + "/projects/" + project) + dl.get([os.path.join(directory, f) for f in files], dataset=dataset) kwargs = {} if names is not None: kwargs['names'] = names @@ -73,4 +91,77 @@ def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any par_list.append(str(param["rw_fcts"][i][k])) pars = "/".join(par_list) rw_dict[param["type"]][pars] = rwms[i] - return rw_dict \ No newline at end of file + return rw_dict + + +def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str=None, names: list[str]=None, files: list[str]=None) -> dict[str, Any]: + dataset = os.path.join(path, "projects", project) + directory = os.path.join(dataset, dir_in_project) + if files is None: + files = [] + for root, ds, fs in os.walk(directory): + for f in fs: + if fnmatch.fnmatch(f, prefix + "*" + postfix + ".dat"): + files.append(f) + dl.get([os.path.join(directory, f) for f in files], dataset=dataset) + kwargs: dict[str, Any] = {} + if names is not None: + kwargs['names'] = names + if files is not None: + kwargs['files'] = files + if postfix is not None: + kwargs['postfix'] = postfix + kwargs['plot_fit'] = False + + t0 = input.extract_t0(directory, + prefix, + dtr_read, + xmin, + spatial_extent, + fit_range=fit_range, + c=0.3, + **kwargs + ) + par_list= [] + for k in ["integrator", "eps", "ntot", "dnms"]: + par_list.append(str(param[k])) + pars = "/".join(par_list) + t0_dict: dict[str, Any] = {} + t0_dict[param["type"]] = {} + t0_dict[param["type"]][pars] = t0 + return t0_dict + + +def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = None, names: list[str]=None, files: list[str]=None) -> dict[str, Any]: + directory = os.path.join(path, "projects", project, dir_in_project) + if files is None: + files = [] + for root, ds, fs in os.walk(directory): + for f in fs: + if fnmatch.fnmatch(f, prefix + "*" + postfix + ".dat"): + files.append(f) + kwargs: dict[str, Any] = {} + if names is not None: + kwargs['names'] = names + if files is not None: + kwargs['files'] = files + if postfix is not None: + kwargs['postfix'] = postfix + kwargs['plot_fit'] = False + t0 = input.extract_t0(directory, + prefix, + dtr_read, + xmin, + spatial_extent, + fit_range=fit_range, + c=2./3, + **kwargs + ) + par_list= [] + for k in ["integrator", "eps", "ntot", "dnms"]: + par_list.append(str(param[k])) + pars = "/".join(par_list) + t0_dict: dict[str, Any] = {} + t0_dict[param["type"]] = {} + t0_dict[param["type"]][pars] = t0 + return t0_dict diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 403d17a..bd4ae4d 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -232,7 +232,6 @@ def get_specs(key, parameters, sep='/') -> str: param = _map_params(parameters, key_parts[1:-1]) else: param = _map_params(parameters, key_parts[1:]) - print(param) s = json.dumps(param) return s @@ -305,8 +304,7 @@ def read_data(path, project, dir_in_project, prefix, param, version='1.0c', cfg_ range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True) for key in data_crr.keys(): data[key] = data_crr[key] - # print("Read data:", data_crr) - # print(f"Read data: pe.input.sfcf.read_sfcf_multi({directory}, {prefix}, {param['crr']}, {param['mrr']}, {corr_type_list}, {range(len(param['wf_offsets']))}, {range(len(param['wf_basis']))}, {range(len(param['wf_basis']))}, {version}, {cfg_seperator}, keyed_out=True, names={names})") + if not param['crs'] == []: data_crs = pe.input.sfcf.read_sfcf_multi(directory, param['crs']) for key in data_crs.keys(): diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 7c9a32e..dc8fadc 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -9,7 +9,7 @@ from pyerrors import Obs, Corr from hashlib import sha256 -def write_measurement(path, ensemble, measurement, uuid, code, parameter_file): +def write_measurement(path, ensemble, measurement, uuid, code, parameter_file=None): """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -49,17 +49,33 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file): pars[subkey] = sfcf.get_specs(corr + "/" + subkey, parameters) elif code == "openQCD": - parameters = openQCD.read_param(path, uuid, parameter_file) - pars = {} - subkeys = [] - for i in range(len(parameters["rw_fcts"])): - par_list = [] - for k in parameters["rw_fcts"][i].keys(): - par_list.append(str(parameters["rw_fcts"][i][k])) + ms_type = list(measurement.keys())[0] + if ms_type == 'ms1': + parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + pars = {} + subkeys = [] + for i in range(len(parameters["rw_fcts"])): + par_list = [] + for k in parameters["rw_fcts"][i].keys(): + par_list.append(str(parameters["rw_fcts"][i][k])) + subkey = "/".join(par_list) + subkeys.append(subkey) + pars[subkey] = json.dumps(parameters["rw_fcts"][i]) + elif ms_type in ['t0', 't1']: + if parameter_file is not None: + parameters = openQCD.read_ms3_param(path, uuid, parameter_file) + else: + parameters = {} + for rwp in ["integrator", "eps", "ntot", "dnms"]: + parameters[rwp] = "Unknown" + pars = {} + subkeys = [] + par_list= [] + for k in ["integrator", "eps", "ntot", "dnms"]: + par_list.append(str(parameters[k])) subkey = "/".join(par_list) - subkeys.append(subkey) - pars[subkey] = json.dumps(parameters["rw_fcts"][i]) - + subkeys = [subkey] + pars[subkey] = json.dumps(parameters) for subkey in subkeys: parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() meas_path = file_in_archive + "::" + parHash diff --git a/corrlib/toml.py b/corrlib/toml.py index 9735bad..0004a4e 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -36,7 +36,7 @@ def check_measurement_data(measurements: dict, code: str) -> None: if code == "sfcf": var_names = ["path", "ensemble", "param_file", "version", "prefix", "cfg_seperator", "names"] elif code == "openQCD": - var_names = ["path", "ensemble", "measurement", "prefix", "param_file"] + var_names = ["path", "ensemble", "measurement", "prefix"] # , "param_file" for mname, md in measurements.items(): for var_name in var_names: if var_name not in md.keys(): @@ -92,11 +92,26 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: version=md['version'], cfg_seperator=md['cfg_seperator'], sep='/') print(mname + " imported.") elif project['code'] == 'openQCD': - param = openQCD.read_param(path, uuid, md['param_file']) - param['type'] = md['measurement'] - measurement = openQCD.read_rwms(path, uuid, md['path'], param, md["prefix"], version=md["version"], names=md['names'], files=md['files']) + if md['measurement'] == 'ms1': + param = openQCD.read_ms1_param(path, uuid, md['param_file']) + param['type'] = 'ms1' + measurement = openQCD.read_rwms(path, uuid, md['path'], param, md["prefix"], version=md["version"], names=md['names'], files=md['files']) + elif md['measurement'] == 't0': + if 'param_file' in md: + param = openQCD.read_ms3_param(path, uuid, md['param_file']) + else: + param = {} + for rwp in ["integrator", "eps", "ntot", "dnms"]: + param[rwp] = "Unknown" + param['type'] = 't0' + measurement = openQCD.extract_t0(path, uuid, md['path'], param, md["prefix"], md["dtr_read"], md["xmin"], md["spatial_extent"], fit_range=md.get('fit_range', 5), postfix=md.get('postfix', None), names=md.get('names', None)) + elif md['measurement'] == 't1': + if 'param_file' in md: + param = openQCD.read_ms3_param(path, uuid, md['param_file']) + param['type'] = 't1' + measurement = openQCD.extract_t1(path, uuid, md['path'], param, md["prefix"], md["dtr_read"], md["xmin"], md["spatial_extent"], fit_range=md.get('fit_range', 5), postfix=md.get('postfix', None), names=md.get('names', None)) - write_measurement(path, ensemble, measurement, uuid, project['code'], md['param_file']) + write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) if not os.path.exists(os.path.join(path, "toml_imports", uuid)): os.makedirs(os.path.join(path, "toml_imports", uuid)) From 65be8023257c52ccf669b5bfb0f95bdece6457f3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 17 Apr 2025 16:25:15 +0000 Subject: [PATCH 2/4] type fix --- corrlib/toml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index 0004a4e..349d304 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -45,7 +45,7 @@ def check_measurement_data(measurements: dict, code: str) -> None: return -def import_tomls(path: str, files: str, copy_files: bool=True) -> None: +def import_tomls(path: str, files: list[str], copy_files: bool=True) -> None: for file in files: import_toml(path, file, copy_files) From 4a179bfa12b42b287312e50c177f2fa1790264ef Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 17 Apr 2025 16:27:08 +0000 Subject: [PATCH 3/4] bug fix, enable add alias from cli, reimporter implemented in cli --- corrlib/cli.py | 52 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index ff4f374..e703857 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -2,9 +2,10 @@ from typing import Optional import typer from corrlib import __app_name__, __version__ from .initialization import create -from .toml import import_tomls, update_project -from .find import find_record, find_project, list_projects +from .toml import import_tomls, update_project, reimport_project +from .find import find_record, list_projects from .tools import str2list +from .main import update_aliases import os app = typer.Typer() @@ -51,10 +52,32 @@ def list( elif entities == 'projects': results = list_projects(path) print("Projects:") - header = "UUID".ljust(37) + "| Names" + header = "UUID".ljust(37) + "| Aliases" print(header) for project in results: - print(project[0], "|", " | ".join(str2list(project[1]))) + if project[1] is not None: + aliases = " | ".join(str2list(project[1])) + else: + aliases = "---" + print(project[0], "|", aliases) + return + + +@app.command() +def alias_add( + path: str = typer.Option( + str('./corrlib'), + "--dataset", + "-d", + ), + uuid: str = typer.Argument(), + alias: str = typer.Argument(), +) -> None: + """ + Add an alias to a project UUID. + """ + alias_list = alias.pülit(",") + update_aliases(path, uuid, alias_list) return @@ -98,6 +121,27 @@ def importer( return +@app.command() +def reimporter( + path: str = typer.Option( + str('./corrlib'), + "--dataset", + "-d", + ), + ident: str = typer.Argument() + ) -> None: + uuid = ident.split("::")[0] + if len(ident.split("::")) > 1: + toml_file = os.path.join(path, "toml_imports", ident.split("::")[1]) + if os.path.exists(toml_file): + import_tomls(path, [toml_file], copy_files=False) + else: + raise Exception("This file is not known for this project.") + else: + reimport_project(path, uuid) + return + + @app.command() def init( path: str = typer.Option( From df66f82bd37d81a0aba172e85b58543c65c5335f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 17 Apr 2025 16:32:17 +0000 Subject: [PATCH 4/4] bugfix: update aliases --- corrlib/main.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/corrlib/main.py b/corrlib/main.py index bac9182..defe03a 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -52,24 +52,29 @@ def update_project_data(db, uuid, prop, value = None): return -def update_aliases(path, uuid, aliases): - known_data = _project_lookup_by_id(path + "/backlogger.db", uuid)[0] +def update_aliases(path: str, uuid: str, aliases: list[str]): + db = os.path.join(path, "backlogger.db") + known_data = _project_lookup_by_id(db, uuid)[0] known_aliases = known_data[1] + if aliases is None: aliases = [] if known_aliases is None: - print(f"Project {uuid} is already imported, no known aliases.") + print(f"Project {uuid} has no known aliases.") known_alias_list = [] else: - print(f"Project {uuid} is already imported, known by names: {known_aliases}") + print(f"Project {uuid} is known by names: {known_aliases}") known_alias_list = str2list(known_aliases) - new_alias_list = known_alias_list + new_alias_list = known_alias_list.copy() for aka in aliases: if aka not in known_alias_list: new_alias_list.append(aka) + if not len(new_alias_list) == len(known_alias_list): alias_str = list2str(new_alias_list) - update_project_data(path, uuid, "aliases", alias_str) + dl.unlock(db, dataset=path) + update_project_data(db, uuid, "aliases", alias_str) + dl.save(db, dataset=path) return @@ -121,7 +126,11 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Unio dl.drop(tmp_path, reckless='kill') shutil.rmtree(tmp_path) if aliases is not None: - update_aliases(path, uuid, aliases) + if isinstance(aliases, str): + alias_list = [aliases] + else: + alias_list = aliases + update_aliases(path, uuid, alias_list) # make this more concrete return uuid