From 029239c701359e84cadb8c093ebd0ccc26d4f9df Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 4 Dec 2025 16:04:14 +0100 Subject: [PATCH 1/4] add docstrings for input files --- corrlib/input/implementations.py | 2 +- corrlib/input/openQCD.py | 128 +++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/corrlib/input/implementations.py b/corrlib/input/implementations.py index 2a14d3e..9cb9e84 100644 --- a/corrlib/input/implementations.py +++ b/corrlib/input/implementations.py @@ -1,2 +1,2 @@ - +# List of supported input implementations codes = ['sfcf', 'openQCD'] diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index e0caba6..da92645 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -6,6 +6,24 @@ from typing import Any, Optional def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: + """ + Read the parameters for ms1 measurements from a parameter file in the project. + + Parameters + ---------- + path: str + The path to the backlogger folder. + project: str + The project from which to read the parameter file. + file_in_project: str + The path to the parameter file within the project. + + Returns + ------- + param: dict[str, Any] + The parameters read from the file. + """ + file = os.path.join(path, "projects", project, file_in_project) ds = os.path.join(path, "projects", project) dl.get(file, dataset=ds) @@ -52,6 +70,24 @@ def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, A def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: + """ + Read the parameters for ms3 measurements from a parameter file in the project. + + Parameters + ---------- + path: str + The path to the backlogger folder. + project: str + The project from which to read the parameter file. + file_in_project: str + The path to the parameter file within the project. + + Returns + ------- + param: dict[str, Any] + The parameters read from the file. + """ + file = os.path.join(path, "projects", project, file_in_project) ds = os.path.join(path, "projects", project) dl.get(file, dataset=ds) @@ -68,6 +104,36 @@ def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, A def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: + """ + Read reweighting factor measurements from the project. + + Parameters + ---------- + path: str + The path to the backlogger folder. + project: str + The project from which to read the measurements. + dir_in_project: str + The directory within the project where the measurements are located. + param: dict[str, Any] + The parameters for the measurements. + prefix: str + The prefix of the measurement files. + postfix: str + The postfix of the measurement files. + version: str + The version of the openQCD used. + names: list[str] + Specific names for the replica of the ensemble the measurement file belongs to. + files: list[str] + Specific files to read. + + Returns + ------- + rw_dict: dict[str, dict[str, Any]] + The reweighting factor measurements read from the files. + """ + dataset = os.path.join(path, "projects", project) directory = os.path.join(dataset, dir_in_project) if files is None: @@ -95,6 +161,37 @@ def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str="", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: + """ + Extract t0 measurements from the project. + + Parameters + ---------- + path: str + The path to the backlogger folder. + project: str + The project from which to read the measurements. + dir_in_project: str + The directory within the project where the measurements are located. + param: dict[str, Any] + The parameters for the measurements. + prefix: str + The prefix of the measurement files. + dtr_read: int + The dtr_read parameter for the extraction. + xmin: int + The xmin parameter for the extraction. + spatial_extent: int + The spatial_extent parameter for the extraction. + fit_range: int + The fit_range parameter for the extraction. + postfix: str + The postfix of the measurement files. + names: list[str] + Specific names for the replica of the ensemble the measurement file belongs to. + files: list[str] + Specific files to read. + """ + dataset = os.path.join(path, "projects", project) directory = os.path.join(dataset, dir_in_project) if files is None: @@ -133,6 +230,37 @@ def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, An def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = "", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: + """ + Extract t1 measurements from the project. + + Parameters + ---------- + path: str + The path to the backlogger folder. + project: str + The project from which to read the measurements. + dir_in_project: str + The directory within the project where the measurements are located. + param: dict[str, Any] + The parameters for the measurements. + prefix: str + The prefix of the measurement files. + dtr_read: int + The dtr_read parameter for the extraction. + xmin: int + The xmin parameter for the extraction. + spatial_extent: int + The spatial_extent parameter for the extraction. + fit_range: int + The fit_range parameter for the extraction. + postfix: str + The postfix of the measurement files. + names: list[str] + Specific names for the replica of the ensemble the measurement file belongs to. + files: list[str] + Specific files to read. + """ + directory = os.path.join(path, "projects", project, dir_in_project) if files is None: files = [] From 3cd7896ae1808d9a5442a7db2808bf784fb3d1f2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 12 Feb 2026 16:20:11 +0100 Subject: [PATCH 2/4] comment get_specs --- corrlib/input/sfcf.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index ed5a45a..6a75b72 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -229,6 +229,24 @@ def _map_params(params: dict[str, Any], spec_list: list[str]) -> dict[str, Any]: def get_specs(key: str, parameters: dict[str, Any], sep: str = '/') -> str: + """ + Get sepcification from the parameter file for a specific key in the read measurements + + Parameters + ---------- + key: str + The key for whioch the parameters are to be looked up. + parameters: dict[str, Any] + The dictionary with the parameters from the parameter file. + sep: str + Separator string for the key. (default="/") + + Return + ------ + s: str + json string holding the parameters. + """ + key_parts = key.split(sep) if corr_types[key_parts[0]] == 'bi': param = _map_params(parameters, key_parts[1:-1]) From 54006f46f5fb8069bcd84a87607bca4abf967c5b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 12 Feb 2026 16:24:40 +0100 Subject: [PATCH 3/4] comment retruns of extract t0 and t1 --- corrlib/input/openQCD.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index da92645..71ebec6 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -190,6 +190,11 @@ def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, An Specific names for the replica of the ensemble the measurement file belongs to. files: list[str] Specific files to read. + + Returns + ------- + t0_dict: dict + Dictionary of t0 values in the pycorrlib style, with the parameters at hand. """ dataset = os.path.join(path, "projects", project) @@ -259,6 +264,11 @@ def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, An Specific names for the replica of the ensemble the measurement file belongs to. files: list[str] Specific files to read. + + Returns + ------- + t1_dict: dict + Dictionary of t1 values in the pycorrlib style, with the parameters at hand. """ directory = os.path.join(path, "projects", project, dir_in_project) @@ -289,7 +299,7 @@ def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, An for k in ["integrator", "eps", "ntot", "dnms"]: par_list.append(str(param[k])) pars = "/".join(par_list) - t0_dict: dict[str, Any] = {} - t0_dict[param["type"]] = {} - t0_dict[param["type"]][pars] = t0 - return t0_dict + t1_dict: dict[str, Any] = {} + t1_dict[param["type"]] = {} + t1_dict[param["type"]][pars] = t0 + return t1_dict From 4631769e81bcb58e6d667aee8e81b875129933a6 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 12 Feb 2026 18:39:04 +0100 Subject: [PATCH 4/4] finish a first pitch of docstrings --- corrlib/cli.py | 3 + corrlib/find.py | 256 ++++++++++++++++++++++++++++---------- corrlib/initialization.py | 35 +++++- corrlib/main.py | 52 +++++--- corrlib/meas_io.py | 72 ++++++++++- corrlib/toml.py | 76 ++++++++++- corrlib/tools.py | 96 +++++++++++++- 7 files changed, 502 insertions(+), 88 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 3f4eb8f..414fcc4 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -134,6 +134,9 @@ def reimporter( ), ident: str = typer.Argument() ) -> None: + """ + Reimport the toml file identfied by the ident string. + """ uuid = ident.split("::")[0] if len(ident.split("::")) > 1: toml_file = os.path.join(path, "toml_imports", ident.split("::")[1]) diff --git a/corrlib/find.py b/corrlib/find.py index 901c09c..21063ec 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -8,11 +8,23 @@ from .tools import k2m, get_db_file from .tracker import get from typing import Any, Optional -# this will implement the search functionality - def _project_lookup_by_alias(db: str, alias: str) -> str: - # this will lookup the project name based on the alias + """ + Lookup a projects UUID by its (human-readable) alias. + + Parameters + ---------- + db: str + The database to look up the project. + alias: str + The alias to look up. + + Returns + ------- + uuid: str + The UUID of the project with the given alias. + """ conn = sqlite3.connect(db) c = conn.cursor() c.execute(f"SELECT * FROM 'projects' WHERE alias = '{alias}'") @@ -26,6 +38,21 @@ def _project_lookup_by_alias(db: str, alias: str) -> str: def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: + """ + Return the project information available in the database by UUID. + + Parameters + ---------- + db: str + The database to look up the project. + uuid: str + The uuid of the project in question. + + Returns + ------- + results: list + The row of the project in the database. + """ conn = sqlite3.connect(db) c = conn.cursor() c.execute(f"SELECT * FROM 'projects' WHERE id = '{uuid}'") @@ -36,6 +63,38 @@ def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: + """ + Look up a correlator record in the database by the data given to the method. + + Parameters + ---------- + db: str + The database to look up the record. + ensemble: str + The ensemble the record is associated with. + correlator_name: str + The name of the correlator in question. + code: str + The name of the code which was used to calculate the correlator. + project: str, optional + The UUID of the project the correlator was calculated in. + parameters: str, optional + A dictionary holding the exact parameters for the measurement that are held in the database. + created_before: str, optional + Timestamp string before which the meaurement has been created. + created_after: str, optional + Timestamp string after which the meaurement has been created. + updated_before: str, optional + Timestamp string before which the meaurement has been updated. + updated_after: str, optional + Timestamp string after which the meaurement has been updated. + + Returns + ------- + results: pd.DataFrame + A pandas DataFrame holding the information received form the DB query. + """ + project_str = project search_expr = f"SELECT * FROM 'backlogs' WHERE name = '{correlator_name}' AND ensemble = '{ensemble}'" @@ -60,85 +119,112 @@ def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: + """ + Filter method for the Database entries holding SFCF calculations. + + Parameters + ---------- + results: pd.DataFrame + The unfiltered pandas DataFrame holding the entries from the database. + + offset: list[float], optional + quark_kappas: list[float] + quarks_masses: list[float] + qk1: float, optional + Mass parameter $\kappa_1$ of the first quark. + qk2: float, optional + Mass parameter $\kappa_2$ of the first quark. + qm1: float, optional + Bare quak mass $m_1$ of the first quark. + qm2: float, optional + Bare quak mass $m_1$ of the first quark. + quarks_thetas: list[list[float]], optional + wf1: optional + wf2: optional + + Results + ------- + results: pd.DataFrame + The filtered DataFrame, only holding the records that fit to the parameters given. + """ drops = [] for ind in range(len(results)): result = results.iloc[ind] - if result['code'] == 'sfcf': - param = json.loads(result['parameters']) - if 'offset' in kwargs: - if kwargs.get('offset') != param['offset']: + param = json.loads(result['parameters']) + if 'offset' in kwargs: + if kwargs.get('offset') != param['offset']: + drops.append(ind) + continue + if 'quark_kappas' in kwargs: + kappas = kwargs['quark_kappas'] + if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): + drops.append(ind) + continue + if 'quark_masses' in kwargs: + masses = kwargs['quark_masses'] + if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): + drops.append(ind) + continue + if 'qk1' in kwargs: + quark_kappa1 = kwargs['qk1'] + if not isinstance(quark_kappa1, list): + if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): drops.append(ind) continue - if 'quark_kappas' in kwargs: - kappas = kwargs['quark_kappas'] - if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): - drops.append(ind) - continue - if 'quark_masses' in kwargs: - masses = kwargs['quark_masses'] - if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): - drops.append(ind) - continue - if 'qk1' in kwargs: - quark_kappa1 = kwargs['qk1'] - if not isinstance(quark_kappa1, list): - if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): + else: + if len(quark_kappa1) == 2: + if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): drops.append(ind) continue - else: - if len(quark_kappa1) == 2: - if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): - drops.append(ind) - continue - if 'qk2' in kwargs: - quark_kappa2 = kwargs['qk2'] - if not isinstance(quark_kappa2, list): - if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): - drops.append(ind) - continue - else: - if len(quark_kappa2) == 2: - if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): - drops.append(ind) - continue - if 'qm1' in kwargs: - quark_mass1 = kwargs['qm1'] - if not isinstance(quark_mass1, list): - if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): - drops.append(ind) - continue - else: - if len(quark_mass1) == 2: - if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): - drops.append(ind) - continue - if 'qm2' in kwargs: - quark_mass2 = kwargs['qm2'] - if not isinstance(quark_mass2, list): - if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): - drops.append(ind) - continue - else: - if len(quark_mass2) == 2: - if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): - drops.append(ind) - continue - if 'quark_thetas' in kwargs: - quark_thetas = kwargs['quark_thetas'] - if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): + if 'qk2' in kwargs: + quark_kappa2 = kwargs['qk2'] + if not isinstance(quark_kappa2, list): + if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): drops.append(ind) continue - # careful, this is not save, when multiple contributions are present! - if 'wf1' in kwargs: - wf1 = kwargs['wf1'] - if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): + else: + if len(quark_kappa2) == 2: + if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): + drops.append(ind) + continue + if 'qm1' in kwargs: + quark_mass1 = kwargs['qm1'] + if not isinstance(quark_mass1, list): + if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): drops.append(ind) continue - if 'wf2' in kwargs: - wf2 = kwargs['wf2'] - if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): + else: + if len(quark_mass1) == 2: + if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): + drops.append(ind) + continue + if 'qm2' in kwargs: + quark_mass2 = kwargs['qm2'] + if not isinstance(quark_mass2, list): + if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): drops.append(ind) continue + else: + if len(quark_mass2) == 2: + if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): + drops.append(ind) + continue + if 'quark_thetas' in kwargs: + quark_thetas = kwargs['quark_thetas'] + if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): + drops.append(ind) + continue + # careful, this is not save, when multiple contributions are present! + if 'wf1' in kwargs: + wf1 = kwargs['wf1'] + if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): + drops.append(ind) + continue + if 'wf2' in kwargs: + wf2 = kwargs['wf2'] + if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): + drops.append(ind) + continue return results.drop(drops) @@ -152,17 +238,49 @@ def find_record(path: str, ensemble: str, correlator_name: str, code: str, proje results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after) if code == "sfcf": results = sfcf_filter(results, **kwargs) + elif code == "openQCD": + pass + else: + raise Exception print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() def find_project(path: str, name: str) -> str: + """ + Find a project by it's human readable name. + + Parameters + ---------- + path: str + The path of the library. + name: str + The name of the project to look for in the library. + + Returns + ------- + uuid: str + The uuid of the project in question. + """ db_file = get_db_file(path) get(path, db_file) return _project_lookup_by_alias(os.path.join(path, db_file), name) def list_projects(path: str) -> list[tuple[str, str]]: + """ + List all projects known to the library. + + Parameters + ---------- + path: str + The path of the library. + + Returns + ------- + results: list[Any] + The projects known to the library. + """ db_file = get_db_file(path) get(path, db_file) conn = sqlite3.connect(os.path.join(path, db_file)) diff --git a/corrlib/initialization.py b/corrlib/initialization.py index 8aa8287..bb71db6 100644 --- a/corrlib/initialization.py +++ b/corrlib/initialization.py @@ -8,6 +8,10 @@ def _create_db(db: str) -> None: """ Create the database file and the table. + Parameters + ---------- + db: str + Path of the database file. """ conn = sqlite3.connect(db) c = conn.cursor() @@ -38,7 +42,21 @@ def _create_db(db: str) -> None: def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: """ - Create the config file for backlogger. + Create the config file construction for backlogger. + + Parameters + ---------- + path: str + The path of the libaray to create. + tracker: str + Type of the tracker to use for the library (only DataLad is supported at the moment). + cached: bool + Whether or not the library will create a cache folder for multiple reads when downloaded. + + Returns + ------- + config: ConfigParser + Cpnfig parser with the default configuration printed. """ config = ConfigParser() @@ -60,6 +78,13 @@ def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: def _write_config(path: str, config: ConfigParser) -> None: """ Write the config file to disk. + + Parameters + ---------- + path: str + The path of the libaray to create. + config: ConfigParser + The configuration to be used as a ConfigParser, e.g. generated by _create_config. """ with open(os.path.join(path, '.corrlib'), 'w') as configfile: config.write(configfile) @@ -70,6 +95,14 @@ def create(path: str, tracker: str = 'datalad', cached: bool = True) -> None: """ Create folder of backlogs. + Parameters + ---------- + path: str + The path at which the library will be created. + tracker: str, optional + The tracker to use for the library. The delauft is DataLad, which is also the only one that is supported at the moment. + cached: bool, optional + Whether or not hte librarby will be cached. By default, it does cache already read entries. """ config = _create_config(path, tracker, cached) init(path, tracker) diff --git a/corrlib/main.py b/corrlib/main.py index 24d5103..88b99b3 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -45,9 +45,25 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni conn.commit() conn.close() save(path, message="Added entry for project " + uuid + " to database", files=[db_file]) + return def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] = None) -> None: + """ + Update/Edit a project entry in the database. + Thin wrapper around sql3 call. + + Parameters + ---------- + path: str + The path to the backlogger folder. + uuid: str + The uuid of the project. + prop: str + Property of the entry to edit + value: str or None + Value to se `prop` to. + """ db_file = get_db_file(path) get(path, db_file) conn = sqlite3.connect(os.path.join(path, db_file)) @@ -88,6 +104,8 @@ def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Optional[list[str]]=None, aliases: Optional[list[str]]=None, code: Optional[str]=None, isDataset: bool=True) -> str: """ + Import a datalad dataset into the backlogger. + Parameters ---------- @@ -95,22 +113,19 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti The url of the project to import. This can be any url that datalad can handle. path: str The path to the backlogger folder. - aliases: list[str] - Custom name of the project, alias of the project. - code: str + owner: str, optional + Person responsible for the maintainance of the project to be impoerted. + tags: list[str], optional + Custom tags of the imported project. + aliases: list[str], optional + Custom names of the project, alias of the project. + code: str, optional Code that was used to create the measurements. - Import a datalad dataset into the backlogger. - - Parameters - ---------- - path: str - The path to the backlogger directory. - url: str - The url of the project to import. This can be any url that datalad can handle. - Also supported are non-datalad datasets, which will be converted to datalad datasets, - in order to receive a uuid and have a consistent interface. - + Returns + ------- + uuid: str + The unique identifier of the imported project. """ tmp_path = os.path.join(path, 'projects/tmp') clone(path, source=url, target=tmp_path) @@ -144,6 +159,15 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti def drop_project_data(path: str, uuid: str, path_in_project: str = "") -> None: """ Drop (parts of) a project to free up diskspace + + Parameters + ---------- + path: str + Path of the library. + uuid: str + The UUID ofthe project rom which data is to be dropped. + path_pn_project: str, optional + If set, only the given path within the project is dropped. """ drop(path + "/projects/" + uuid + "/" + path_in_project) return diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a00079e..65a0569 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -27,6 +27,10 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, Measurements to be captured in the backlogging system. uuid: str The uuid of the project. + code: str + Name of the code that was used for the project. + parameter_file: str + The parameter file used for the measurement. """ db_file = get_db_file(path) db = os.path.join(path, db_file) @@ -97,6 +101,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, files.append(os.path.join(path, db_file)) conn.close() save(path, message="Add measurements to database", files=files) + return def load_record(path: str, meas_path: str) -> Union[Corr, Obs]: @@ -128,10 +133,13 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = { Path of the correlator library. meas_paths: list[str] A list of the paths to the correlator in the backlog system. + perloaded: dict[str, Any] + The data that is already prelaoded. Of interest if data has alread been loaded in the same script. Returns ------- - List + retruned_data: list + The loaded records. """ needed_data: dict[str, list[str]] = {} for mpath in meas_paths: @@ -157,6 +165,20 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = { def cache_dir(path: str, file: str) -> str: + """ + Returns the directory corresponding to the cache for the given file. + + Parameters + ---------- + path: str + The path of the library. + file: str + The file in the library that we want to access the cached data of. + Returns + ------- + cache_path: str + The path holding the cached data for the given file. + """ cache_path_list = [path] cache_path_list.append(".cache") cache_path_list.extend(file.split("/")[1:]) @@ -165,11 +187,41 @@ def cache_dir(path: str, file: str) -> str: def cache_path(path: str, file: str, key: str) -> str: + """ + Parameters + ---------- + path: str + The path of the library. + file: str + The file in the library that we want to access the cached data of. + key: str + The key within the archive file. + + Returns + ------- + cache_path: str + The path at which the measurement of the given file and key is cached. + """ cache_path = os.path.join(cache_dir(path, file), key) return cache_path def preload(path: str, file: str) -> dict[str, Any]: + """ + Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method. + + Parameters + ---------- + path: str + The path of the library. + file: str + The file within the library to be laoded. + + Returns + ------- + filedict: dict[str, Any] + The data read from the file. + """ get(path, file) filedict: dict[str, Any] = pj.load_json_dict(os.path.join(path, file)) print("> read file") @@ -177,6 +229,16 @@ def preload(path: str, file: str) -> dict[str, Any]: def drop_record(path: str, meas_path: str) -> None: + """ + Drop a record by it's path. + + Parameters + ---------- + path: str + The path of the library. + meas_path: str + The measurement path as noted in the database. + """ file_in_archive = meas_path.split("::")[0] file = os.path.join(path, file_in_archive) db_file = get_db_file(path) @@ -204,6 +266,14 @@ def drop_record(path: str, meas_path: str) -> None: def drop_cache(path: str) -> None: + """ + Drop the cache directory of the library. + + Parameters + ---------- + path: str + The path of the library. + """ cache_dir = os.path.join(path, ".cache") for f in os.listdir(cache_dir): shutil.rmtree(os.path.join(cache_dir, f)) diff --git a/corrlib/toml.py b/corrlib/toml.py index c1c4d5b..629a499 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -22,6 +22,18 @@ from typing import Any def replace_string(string: str, name: str, val: str) -> str: + """ + Replace a placeholder {name} with a value in a string. + + Parameters + ---------- + string: str + String in which the placeholders are to be replaced. + name: str + The name of the placeholder. + val: str + The value the placeholder is to be replaced with. + """ if '{' + name + '}' in string: n = string.replace('{' + name + '}', val) return n @@ -30,7 +42,16 @@ def replace_string(string: str, name: str, val: str) -> str: def replace_in_meas(measurements: dict[str, dict[str, Any]], vars: dict[str, str]) -> dict[str, dict[str, Any]]: - # replace global variables + """ + Replace placeholders in the defiitions for a measurement. + + Parameters + ---------- + measurements: dict[str, dict[str, Any]] + The measurements read from the toml file. + vars: dict[str, str] + Simple key:value dictionary with the keys to be replaced by the values. + """ for name, value in vars.items(): for m in measurements.keys(): for key in measurements[m].keys(): @@ -43,6 +64,16 @@ def replace_in_meas(measurements: dict[str, dict[str, Any]], vars: dict[str, str def fill_cons(measurements: dict[str, dict[str, Any]], constants: dict[str, str]) -> dict[str, dict[str, Any]]: + """ + Fill in defined constants into the measurements of the toml-file. + + Parameters + ---------- + measurements: dict[str, dict[str, Any]] + The measurements read from the toml file. + constants: dict[str, str] + Simple key:value dictionary with the keys to be replaced by the values. + """ for m in measurements.keys(): for name, val in constants.items(): if name not in measurements[m].keys(): @@ -51,6 +82,14 @@ def fill_cons(measurements: dict[str, dict[str, Any]], constants: dict[str, str] def check_project_data(d: dict[str, dict[str, str]]) -> None: + """ + Check the data given in the toml import file for the project we want to import. + + Parameters + ---------- + d: dict + The dictionary holding the data necessary to import the project. + """ if 'project' not in d.keys() or 'measurements' not in d.keys() or len(list(d.keys())) > 4: raise ValueError('There should only be maximally be four keys on the top level, "project" and "measurements" are mandatory, "contants" is optional!') project_data = d['project'] @@ -64,6 +103,16 @@ def check_project_data(d: dict[str, dict[str, str]]) -> None: def check_measurement_data(measurements: dict[str, dict[str, str]], code: str) -> None: + """ + Check syntax of the measurements we want to import. + + Parameters + ---------- + measurements: dict[str, dict[str, str]] + The dictionary holding the necessary data to import the project. + code: str + The code used for the project. + """ var_names: list[str] = [] if code == "sfcf": var_names = ["path", "ensemble", "param_file", "version", "prefix", "cfg_seperator", "names"] @@ -78,8 +127,21 @@ def check_measurement_data(measurements: dict[str, dict[str, str]], code: str) - def import_tomls(path: str, files: list[str], copy_files: bool=True) -> None: + """ + Import multiple toml files. + + Parameters + ---------- + path: str + Path to the backlog directory. + files: list[str] + Path to the description files. + copy_files: bool, optional + Whether the toml-files will be copied into the library. Default is True. + """ for file in files: import_toml(path, file, copy_files) + return def import_toml(path: str, file: str, copy_file: bool=True) -> None: @@ -92,6 +154,8 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: Path to the backlog directory. file: str Path to the description file. + copy_file: bool, optional + Whether the toml-files will be copied into the library. Default is True. """ print("Import project as decribed in " + file) with open(file, 'rb') as fp: @@ -180,6 +244,16 @@ def reimport_project(path: str, uuid: str) -> None: def update_project(path: str, uuid: str) -> None: + """ + Update all entries associated with a given project. + + Parameters + ---------- + path: str + The path of the library. + uuid: str + The unique identifier of the project to be updated. + """ dl.update(how='merge', follow='sibling', dataset=os.path.join(path, "projects", uuid)) # reimport_project(path, uuid) return diff --git a/corrlib/tools.py b/corrlib/tools.py index 9c39d7c..118b094 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -3,26 +3,91 @@ from configparser import ConfigParser from typing import Any CONFIG_FILENAME = ".corrlib" +cached: bool = True def str2list(string: str) -> list[str]: + """ + Convert a comma-separated string to a list. + + Parameters + ---------- + string: str + The sting holding a comma-sparated list. + + Returns + ------- + s: list[str] + The list of strings that was held bythe comma separated string. + """ return string.split(",") def list2str(mylist: list[str]) -> str: + """ + Convert a list to a comma-separated string. + + Parameters + ---------- + mylist: list[str] + A list of strings to be concatinated. + + Returns + ------- + s: list[str] + The sting holding a comma-sparated list. + """ s = ",".join(mylist) return s -cached: bool = True - def m2k(m: float) -> float: + """ + Convert to bare quark mas $m$ to inverse mass parameter $kappa$. + + Parameters + ---------- + m: float + Bare quark mass. + + Returns + ------- + k: float + The corresponing $kappa$. + """ return 1/(2*m+8) def k2m(k: float) -> float: + """ + Convert from the inverse bare quark parameter $kappa$ to the bare quark mass $m$. + + Parameters + ---------- + k: float + Inverse bare quark mass parameter $kappa$. + + Returns + ------- + m: float + The corresponing bare quark mass. + """ return (1/(2*k))-4 def set_config(path: str, section: str, option: str, value: Any) -> None: + """ + Set configuration parameters for the library. + + Parameters + ---------- + path: str + The path of the library. + section: str + The section within the configuration file. + option: str + The option to be set to value. + value: Any + The value we set the option to. + """ config_path = os.path.join(path, '.corrlib') config = ConfigParser() if os.path.exists(config_path): @@ -36,6 +101,19 @@ def set_config(path: str, section: str, option: str, value: Any) -> None: def get_db_file(path: str) -> str: + """ + Get the database file associated with the library at the given path. + + Parameters + ---------- + path: str + The path of the library. + + Returns + ------- + db_file: str + The file holding the database. + """ config_path = os.path.join(path, CONFIG_FILENAME) config = ConfigParser() if os.path.exists(config_path): @@ -45,6 +123,20 @@ def get_db_file(path: str) -> str: def cache_enabled(path: str) -> bool: + """ + Check, whether the library is cached. + Fallback is true. + + Parameters + ---------- + path: str + The path of the library. + + Returns + ------- + cached_bool: bool + Whether the given library is cached. + """ config_path = os.path.join(path, CONFIG_FILENAME) config = ConfigParser() if os.path.exists(config_path):