From 50ff178a0cfd4979c68885cd6249c9a2820ac517 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 2 Sep 2025 10:30:56 +0000 Subject: [PATCH 001/111] import sha1 as groundlayer --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 1a06dac..fca5671 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -6,7 +6,7 @@ from .input import sfcf,openQCD import json from typing import Union from pyerrors import Obs, Corr, dump_object, load_object -from hashlib import sha256 +from hashlib import sha256, sha1 from .tools import cached import shutil From 5e87f569e2a8c085fe942c6774d22039a985056f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 7 Nov 2025 09:43:58 +0000 Subject: [PATCH 002/111] update TODO --- TODO.md | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/TODO.md b/TODO.md index 4153fc3..85074f8 100644 --- a/TODO.md +++ b/TODO.md @@ -1,14 +1,17 @@ # TODO ## Features -- implement import of non-datalad projects -- implement a way to use another backlog repo as a project - -- find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project - - this could e.g. be done along the lines of mandatory documentation -- keep better track of the versions of the code, that was used for a specific measurement. - - maybe let this be an input in the project file? - - git repo and commit hash/version tag - +- [ ] implement import of non-datalad projects +- [ ] implement a way to use another backlog repo as a project +- [ ] make cache deadlock resistent (no read while writing) +- [ ] find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project + - [ ] this could e.g. be done along the lines of mandatory documentation +- [ ] keep better track of the versions of the code, that was used for a specific measurement. + - [ ] maybe let this be an input in the project file? + - [ ] git repo and commit hash/version tag + - [ ] implement a code table? +- [ ] parallel processing of measurements +- [ ] extra SQL table for ensembles with UUID and aliases ## Bugfixes - [ ] revisit the reimport function for single files +- [ ] drop record needs to look if no records are left in a json file. From 085256857d5f3cefc413a6a4364354b886f1e419 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:07:16 +0100 Subject: [PATCH 003/111] add test notebook to ignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1530e48..e7385f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ pyerrors_corrlib.egg-info __pycache__ -*.egg-info \ No newline at end of file +*.egg-info +test.ipynb From 73d7687359b4d8f1b5e8351f6700fff8be0a6b4e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:08:03 +0100 Subject: [PATCH 004/111] update TODO and README --- README.md | 9 +++++++++ TODO.md | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/README.md b/README.md index 976ae57..0f6c9a3 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,12 @@ This is done in a reproducible way using `datalad`. In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together. Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements. The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors. + +We work with the following nomenclature in this project: +- Measurement + A setis of Observables, including the appropriate metadata. +- Project + A series of measurements that was done by one person as part of their research. +- Record + An entry of a single Correlator in the database of the backlogger. +- \ No newline at end of file diff --git a/TODO.md b/TODO.md index 85074f8..ba32ec9 100644 --- a/TODO.md +++ b/TODO.md @@ -15,3 +15,7 @@ ## Bugfixes - [ ] revisit the reimport function for single files - [ ] drop record needs to look if no records are left in a json file. + +## Rough Ideas +- [ ] multitable could provide a high speed implementation of an HDF5 based format +- [ ] implement also a way to include compiled binaries in the archives. From 91c7a9d95de7e5fb9be4f357798158226f31240a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:11:37 +0100 Subject: [PATCH 005/111] clean up --- projects/tmp | 1 - tests/test_import_project.py | 0 2 files changed, 1 deletion(-) delete mode 160000 projects/tmp delete mode 100644 tests/test_import_project.py diff --git a/projects/tmp b/projects/tmp deleted file mode 160000 index 216fe4e..0000000 --- a/projects/tmp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 216fe4ed3467ed486390735f8072856cf3d0a409 diff --git a/tests/test_import_project.py b/tests/test_import_project.py deleted file mode 100644 index e69de29..0000000 From 4f3e78177e0497ff344f4d949cb8af8cbfb6c832 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:13:17 +0100 Subject: [PATCH 006/111] refactor io --- corrlib/cache_io.py | 33 +++++++++++++++++++++++++++++++++ corrlib/meas_io.py | 31 +++++++------------------------ corrlib/tools.py | 6 ++++++ 3 files changed, 46 insertions(+), 24 deletions(-) create mode 100644 corrlib/cache_io.py diff --git a/corrlib/cache_io.py b/corrlib/cache_io.py new file mode 100644 index 0000000..4d1d632 --- /dev/null +++ b/corrlib/cache_io.py @@ -0,0 +1,33 @@ +from typing import Union, Optional +import os +import shutil + + +def drop_cache_files(path: str, fs: Optional[list[str]]=None): + cache_dir = os.path.join(path, ".cache") + if fs is None: + fs = os.listdir(cache_dir) + for f in fs: + shutil.rmtree(os.path.join(cache_dir, f)) + + +def cache_dir(path, file): + cache_path_list = [path] + cache_path_list.append(".cache") + cache_path_list.extend(file.split("/")[1:]) + cache_path = "/".join(cache_path_list) + return cache_path + + +def cache_path(path, file, hash, key): + cache_path = os.path.join(cache_dir(path, file), hash, key) + return cache_path + +def is_in_cache(path, record, hash): + + if os.file.exists(cache_path(path, file, hash, key)): + return True + else: + return False + + \ No newline at end of file diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index fca5671..ad9a6e8 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -4,14 +4,15 @@ import datalad.api as dl import sqlite3 from .input import sfcf,openQCD import json -from typing import Union +from typing import Union, Optional from pyerrors import Obs, Corr, dump_object, load_object from hashlib import sha256, sha1 -from .tools import cached +from .tools import cached, record2name_key import shutil +from .caching import cache_path, cache_dir -def write_measurement(path, ensemble, measurement, uuid, code, parameter_file=None): +def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None): """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -115,7 +116,7 @@ def load_record(path: str, meas_path: str): return load_records(path, [meas_path])[0] -def load_records(path: str, meas_paths: list[str], preloaded = {}) -> list[Union[Corr, Obs]]: +def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -131,11 +132,10 @@ def load_records(path: str, meas_paths: list[str], preloaded = {}) -> list[Union List """ needed_data: dict[str, list[str]] = {} - for mpath in meas_paths: - file = mpath.split("::")[0] + for rpath in record_paths: + file, key = record2name_key(rpath) if file not in needed_data.keys(): needed_data[file] = [] - key = mpath.split("::")[1] needed_data[file].append(key) returned_data: list = [] for file in needed_data.keys(): @@ -153,19 +153,6 @@ def load_records(path: str, meas_paths: list[str], preloaded = {}) -> list[Union return returned_data -def cache_dir(path, file): - cache_path_list = [path] - cache_path_list.append(".cache") - cache_path_list.extend(file.split("/")[1:]) - cache_path = "/".join(cache_path_list) - return cache_path - - -def cache_path(path, file, key): - cache_path = os.path.join(cache_dir(path, file), key) - return cache_path - - def preload(path: str, file: str): dl.get(os.path.join(path, file), dataset=path) filedict = pj.load_json_dict(os.path.join(path, file)) @@ -196,7 +183,3 @@ def drop_record(path: str, meas_path: str): else: raise ValueError("This measurement does not exist as a file!") -def drop_cache(path: str): - cache_dir = os.path.join(path, ".cache") - for f in os.listdir(cache_dir): - shutil.rmtree(os.path.join(cache_dir, f)) diff --git a/corrlib/tools.py b/corrlib/tools.py index 3ac8bfe..44697cc 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -16,3 +16,9 @@ def m2k(m): def k2m(k): return (1/(2*k))-4 + + +def record2name_key(record_path: str): + file = record_path.split("::")[0] + key = record_path.split("::")[1] + return file, key \ No newline at end of file From a080ca835f7d22299bb7c21b6d9dea710549aaf3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 11:53:25 +0100 Subject: [PATCH 007/111] write wrapper hash method --- corrlib/tools.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 44697cc..70eb518 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,5 +1,5 @@ - - +import hashlib +import os def str2list(string): return string.split(",") @@ -21,4 +21,11 @@ def k2m(k): def record2name_key(record_path: str): file = record_path.split("::")[0] key = record_path.split("::")[1] - return file, key \ No newline at end of file + return file, key + + +def make_version_hash(path, record): + file, key = record2name_key(record) + with open(os.path.join(path, file), 'rb') as fp: + file_hash = hashlib.file_digest(fp, 'sha1').hexdigest() + return file_hash From f47a9caae7c55dbcb83c430262b458b5ef78e33c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 21:46:15 +0100 Subject: [PATCH 008/111] include a test dataset --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e7385f6..dd21dea 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ pyerrors_corrlib.egg-info __pycache__ *.egg-info test.ipynb +test_ds From df71ee5ad9f2083668f2a2c75831858917ba1626 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 21:47:45 +0100 Subject: [PATCH 009/111] add gitpython --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index add6910..6b8794e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup(name='pycorrlib', version=version['__version__'], author='Justus Kuhlmann', author_email='j_kuhl19@uni-muenster.de', - install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5'], + install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5', 'gitpython>=3.1.45'], entry_points = { 'console_scripts': ['pcl=corrlib.cli:app'], }, From c9fe09d9d6f0baa1a8cb54e405a3bae9ade91783 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 21:50:59 +0100 Subject: [PATCH 010/111] add functionality to automatically register when cache has an old version of an archived file --- corrlib/__init__.py | 1 + corrlib/cache_io.py | 45 ++++++++++++++++++++++++++++++--------- corrlib/cli.py | 4 ++-- corrlib/initialization.py | 3 ++- corrlib/meas_io.py | 41 ++++++++++++++++++++++------------- 5 files changed, 66 insertions(+), 28 deletions(-) diff --git a/corrlib/__init__.py b/corrlib/__init__.py index 91b07f4..41d8691 100644 --- a/corrlib/__init__.py +++ b/corrlib/__init__.py @@ -19,5 +19,6 @@ from .main import * from .import input as input from .initialization import * from .meas_io import * +from .cache_io import * from .find import * from .version import __version__ diff --git a/corrlib/cache_io.py b/corrlib/cache_io.py index 4d1d632..c890164 100644 --- a/corrlib/cache_io.py +++ b/corrlib/cache_io.py @@ -1,6 +1,19 @@ from typing import Union, Optional import os import shutil +from .tools import record2name_key +from pyerrors import dump_object +import datalad.api as dl +import sqlite3 + + +def get_version_hash(path, record): + db = os.path.join(path, "backlogger.db") + dl.get(db, dataset=path) + conn = sqlite3.connect(db) + c = conn.cursor() + c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'") + return c.fetchall()[0][0] def drop_cache_files(path: str, fs: Optional[list[str]]=None): @@ -19,15 +32,27 @@ def cache_dir(path, file): return cache_path -def cache_path(path, file, hash, key): - cache_path = os.path.join(cache_dir(path, file), hash, key) +def cache_path(path, file, sha_hash, key): + cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash) return cache_path -def is_in_cache(path, record, hash): - - if os.file.exists(cache_path(path, file, hash, key)): - return True - else: - return False - - \ No newline at end of file + +def is_old_version(path, record): + version_hash = get_version_hash(path, record) + file, key = record2name_key(record) + meas_cache_path = os.path.join(cache_dir(path, file)) + ls = [] + for p, ds, fs in os.walk(meas_cache_path): + ls.extend(fs) + for filename in ls: + if key == filename.split("_")[0]: + if not version_hash == filename.split("_")[1][:-2]: + return True + else: + return False + + +def is_in_cache(path, record): + version_hash = get_version_hash(path, record) + file, key = record2name_key(record) + return os.path.exists(cache_path(path, file, version_hash, key) + ".p") diff --git a/corrlib/cli.py b/corrlib/cli.py index b808c13..44ede1b 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -6,7 +6,7 @@ from .toml import import_tomls, update_project, reimport_project from .find import find_record, list_projects from .tools import str2list from .main import update_aliases -from .meas_io import drop_cache as mio_drop_cache +from .cache_io import drop_cache_files as cio_drop_cache_files import os @@ -171,7 +171,7 @@ def drop_cache( """ Drop the currect cache directory of the dataset. """ - mio_drop_cache(path) + cio_drop_cache_files(path) return diff --git a/corrlib/initialization.py b/corrlib/initialization.py index f6ef5aa..e5c0ede 100644 --- a/corrlib/initialization.py +++ b/corrlib/initialization.py @@ -21,7 +21,8 @@ def _create_db(db): parameters TEXT, parameter_file TEXT, created_at TEXT, - updated_at TEXT)''') + updated_at TEXT, + current_version TEXT)''') c.execute('''CREATE TABLE IF NOT EXISTS projects (id TEXT PRIMARY KEY, aliases TEXT, diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index ad9a6e8..ff7cdc8 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -5,11 +5,10 @@ import sqlite3 from .input import sfcf,openQCD import json from typing import Union, Optional -from pyerrors import Obs, Corr, dump_object, load_object -from hashlib import sha256, sha1 -from .tools import cached, record2name_key -import shutil -from .caching import cache_path, cache_dir +from pyerrors import Obs, Corr, load_object, dump_object +from hashlib import sha256 +from .tools import cached, record2name_key, make_version_hash +from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None): @@ -79,11 +78,13 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O subkey = "/".join(par_list) subkeys = [subkey] pars[subkey] = json.dumps(parameters) - for subkey in subkeys: - parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() - meas_path = file_in_archive + "::" + parHash - known_meas[parHash] = measurement[corr][subkey] + meas_paths = [] + for subkey in subkeys: + par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() + meas_path = file_in_archive + "::" + par_hash + meas_paths.append(meas_path) + known_meas[par_hash] = measurement[corr][subkey] if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) @@ -92,7 +93,12 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) conn.commit() pj.dump_dict_to_json(known_meas, file) - files.append(path + '/backlogger.db') + for meas_path in meas_paths: + version_hash = make_version_hash(path, meas_path) + print(version_hash) + c.execute("UPDATE backlogs SET current_version = ? WHERE project = ? AND code = ? and name = ?", (version_hash, uuid, code, corr)) + conn.commit() + files.append(db) conn.close() dl.save(files, message="Add measurements to database", dataset=path) @@ -140,16 +146,21 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni returned_data: list = [] for file in needed_data.keys(): for key in list(needed_data[file]): - if os.path.exists(cache_path(path, file, key) + ".p"): - returned_data.append(load_object(cache_path(path, file, key) + ".p")) + record = file + "::" + key + current_version = get_version_hash(path, record) + if is_in_cache(path, record): + returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p")) else: if file not in preloaded: preloaded[file] = preload(path, file) returned_data.append(preloaded[file][key]) if cached: - if not os.path.exists(cache_dir(path, file)): - os.makedirs(cache_dir(path, file)) - dump_object(preloaded[file][key], cache_path(path, file, key)) + if not is_in_cache(path, record): + file, key = record2name_key(record) + if not os.path.exists(cache_dir(path, file)): + os.makedirs(cache_dir(path, file)) + current_version = get_version_hash(path, record) + dump_object(preloaded[file][key], cache_path(path, file, current_version, key)) return returned_data From 64579c477cd68e29cd059bf4750a026bc04e01c9 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 27 Nov 2025 11:07:55 +0100 Subject: [PATCH 011/111] better db call order --- corrlib/meas_io.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index ff7cdc8..b8695ca 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -6,7 +6,7 @@ from .input import sfcf,openQCD import json from typing import Union, Optional from pyerrors import Obs, Corr, load_object, dump_object -from hashlib import sha256 +from hashlib import sha256, sha1 from .tools import cached, record2name_key, make_version_hash from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash @@ -85,18 +85,12 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O meas_path = file_in_archive + "::" + par_hash meas_paths.append(meas_path) known_meas[par_hash] = measurement[corr][subkey] - - if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: - c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) - else: - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + data_hash = sha1(pj.create_json_string(measurement[corr][subkey]).encode('UTF-8')).hexdigest() + if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None: + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) - conn.commit() + c.execute("UPDATE backlogs SET current_version = ?, updated_at = datetime('now') WHERE path = ?", (data_hash, meas_path)) pj.dump_dict_to_json(known_meas, file) - for meas_path in meas_paths: - version_hash = make_version_hash(path, meas_path) - print(version_hash) - c.execute("UPDATE backlogs SET current_version = ? WHERE project = ? AND code = ? and name = ?", (version_hash, uuid, code, corr)) conn.commit() files.append(db) conn.close() From 5bd94633e86080e2763fd82b509bbccc6fa8b2c0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 28 Nov 2025 16:42:50 +0100 Subject: [PATCH 012/111] centralize file and key to record concat and back --- corrlib/meas_io.py | 11 +++++------ corrlib/tools.py | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index b8695ca..a78f6f2 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -7,7 +7,7 @@ import json from typing import Union, Optional from pyerrors import Obs, Corr, load_object, dump_object from hashlib import sha256, sha1 -from .tools import cached, record2name_key, make_version_hash +from .tools import cached, record2name_key, name_key2record, make_version_hash from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash @@ -82,10 +82,10 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O meas_paths = [] for subkey in subkeys: par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() - meas_path = file_in_archive + "::" + par_hash + meas_path = name_key2record(file_in_archive, par_hash) meas_paths.append(meas_path) known_meas[par_hash] = measurement[corr][subkey] - data_hash = sha1(pj.create_json_string(measurement[corr][subkey]).encode('UTF-8')).hexdigest() + data_hash = make_version_hash(path, meas_path) if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None: c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) @@ -140,7 +140,7 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni returned_data: list = [] for file in needed_data.keys(): for key in list(needed_data[file]): - record = file + "::" + key + record = name_key2record(file, key) current_version = get_version_hash(path, record) if is_in_cache(path, record): returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p")) @@ -165,10 +165,9 @@ def preload(path: str, file: str): def drop_record(path: str, meas_path: str): - file_in_archive = meas_path.split("::")[0] + file_in_archive, sub_key = record2name_key(meas_path) file = os.path.join(path, file_in_archive) db = os.path.join(path, 'backlogger.db') - sub_key = meas_path.split("::")[1] dl.unlock(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() diff --git a/corrlib/tools.py b/corrlib/tools.py index 70eb518..43ab1ba 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -24,6 +24,10 @@ def record2name_key(record_path: str): return file, key +def name_key2record(name: str, key: str): + return name + "::" + key + + def make_version_hash(path, record): file, key = record2name_key(record) with open(os.path.join(path, file), 'rb') as fp: From 7e38d71b90e13927bea8d56511b8ff809ecde2dd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 28 Nov 2025 16:57:15 +0100 Subject: [PATCH 013/111] re-add some tools functions --- corrlib/tools.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/corrlib/tools.py b/corrlib/tools.py index 337912e..e8a9c18 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -17,3 +17,20 @@ def m2k(m): def k2m(k): return (1/(2*k))-4 + + +def record2name_key(record_path: str): + file = record_path.split("::")[0] + key = record_path.split("::")[1] + return file, key + + +def name_key2record(name: str, key: str): + return name + "::" + key + + +def make_version_hash(path, record): + file, key = record2name_key(record) + with open(os.path.join(path, file), 'rb') as fp: + file_hash = hashlib.file_digest(fp, 'sha1').hexdigest() + return file_hash From e07f2ef9b0b67823eee1d0fcf505f92be009f86f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 28 Nov 2025 17:07:36 +0100 Subject: [PATCH 014/111] re-add get_file method --- corrlib/tools.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/corrlib/tools.py b/corrlib/tools.py index e8a9c18..82c45fb 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -19,6 +19,15 @@ def k2m(k): return (1/(2*k))-4 +def get_file(path, file): + if file == "backlogger.db": + print("Downloading database...") + else: + print("Downloading data...") + dl.get(os.path.join(path, file), dataset=path) + print("> downloaded file") + + def record2name_key(record_path: str): file = record_path.split("::")[0] key = record_path.split("::")[1] From 14d19ce9dd47ba9e3bf57377f3b3bb64d8b7cb5a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 18 Feb 2026 10:59:12 +0100 Subject: [PATCH 015/111] add typing for tests --- tests/cli_test.py | 13 +++++++------ tests/import_project_test.py | 2 +- tests/sfcf_in_test.py | 2 +- tests/test_initialization.py | 9 +++++---- tests/tools_test.py | 10 +++++----- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/tests/cli_test.py b/tests/cli_test.py index a6b0bd7..f1678c6 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -2,18 +2,19 @@ from typer.testing import CliRunner from corrlib.cli import app import os import sqlite3 as sql +from pathlib import Path runner = CliRunner() -def test_version(): +def test_version() -> None: result = runner.invoke(app, ["--version"]) assert result.exit_code == 0 assert "corrlib" in result.output -def test_init_folders(tmp_path): +def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 @@ -21,7 +22,7 @@ def test_init_folders(tmp_path): assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_db(tmp_path): +def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 @@ -37,7 +38,7 @@ def test_init_db(tmp_path): table_names = [table[0] for table in tables] for expected_table in expected_tables: assert expected_table in table_names - + cursor.execute("SELECT * FROM projects;") projects = cursor.fetchall() assert len(projects) == 0 @@ -60,7 +61,7 @@ def test_init_db(tmp_path): project_column_names = [col[1] for col in project_columns] for expected_col in expected_project_columns: assert expected_col in project_column_names - + cursor.execute("PRAGMA table_info('backlogs');") backlog_columns = cursor.fetchall() expected_backlog_columns = [ @@ -81,7 +82,7 @@ def test_init_db(tmp_path): assert expected_col in backlog_column_names -def test_list(tmp_path): +def test_list(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 diff --git a/tests/import_project_test.py b/tests/import_project_test.py index 2dea06f..685d2cf 100644 --- a/tests/import_project_test.py +++ b/tests/import_project_test.py @@ -1,7 +1,7 @@ import corrlib.toml as t -def test_toml_check_measurement_data(): +def test_toml_check_measurement_data() -> None: measurements = { "a": { diff --git a/tests/sfcf_in_test.py b/tests/sfcf_in_test.py index 72921e7..5e4ff83 100644 --- a/tests/sfcf_in_test.py +++ b/tests/sfcf_in_test.py @@ -1,7 +1,7 @@ import corrlib.input.sfcf as input import json -def test_get_specs(): +def test_get_specs() -> None: parameters = { 'crr': [ 'f_P', 'f_A' diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 1ea0ece..9284c82 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -1,22 +1,23 @@ import corrlib.initialization as init import os import sqlite3 as sql +from pathlib import Path -def test_init_folders(tmp_path): +def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path)) assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_folders_no_tracker(tmp_path): +def test_init_folders_no_tracker(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path), tracker="None") assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_config(tmp_path): +def test_init_config(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path), tracker="None") config_path = dataset_path / ".corrlib" @@ -34,7 +35,7 @@ def test_init_config(tmp_path): assert config.get("paths", "import_scripts_path") == "import_scripts" -def test_init_db(tmp_path): +def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) diff --git a/tests/tools_test.py b/tests/tools_test.py index ee76f1c..88dbffa 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -3,29 +3,29 @@ from corrlib import tools as tl -def test_m2k(): +def test_m2k() -> None: for m in [0.1, 0.5, 1.0]: expected_k = 1 / (2 * m + 8) assert tl.m2k(m) == expected_k -def test_k2m(): +def test_k2m() -> None: for m in [0.1, 0.5, 1.0]: assert tl.k2m(m) == (1/(2*m))-4 -def test_k2m_m2k(): +def test_k2m_m2k() -> None: for m in [0.1, 0.5, 1.0]: k = tl.m2k(m) m_converted = tl.k2m(k) assert abs(m - m_converted) < 1e-9 -def test_str2list(): +def test_str2list() -> None: assert tl.str2list("a,b,c") == ["a", "b", "c"] assert tl.str2list("1,2,3") == ["1", "2", "3"] -def test_list2str(): +def test_list2str() -> None: assert tl.list2str(["a", "b", "c"]) == "a,b,c" assert tl.list2str(["1", "2", "3"]) == "1,2,3" From 3d91509ab6e072537c225d1f63991a8542683a95 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 09:42:28 +0100 Subject: [PATCH 016/111] rename getter for the database file name --- corrlib/cache_io.py | 28 ++++++++++++++-------------- corrlib/find.py | 8 ++++---- corrlib/main.py | 10 +++++----- corrlib/meas_io.py | 10 +++++----- corrlib/tracker.py | 4 ++-- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/corrlib/cache_io.py b/corrlib/cache_io.py index c890164..63d2e68 100644 --- a/corrlib/cache_io.py +++ b/corrlib/cache_io.py @@ -1,22 +1,22 @@ -from typing import Union, Optional +from typing import Optional import os import shutil from .tools import record2name_key -from pyerrors import dump_object import datalad.api as dl import sqlite3 +from tools import db_filename -def get_version_hash(path, record): - db = os.path.join(path, "backlogger.db") +def get_version_hash(path: str, record: str) -> str: + db = os.path.join(path, db_filename(path)) dl.get(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'") - return c.fetchall()[0][0] + return str(c.fetchall()[0][0]) -def drop_cache_files(path: str, fs: Optional[list[str]]=None): +def drop_cache_files(path: str, fs: Optional[list[str]]=None) -> None: cache_dir = os.path.join(path, ".cache") if fs is None: fs = os.listdir(cache_dir) @@ -24,7 +24,7 @@ def drop_cache_files(path: str, fs: Optional[list[str]]=None): shutil.rmtree(os.path.join(cache_dir, f)) -def cache_dir(path, file): +def cache_dir(path: str, file: str) -> str: cache_path_list = [path] cache_path_list.append(".cache") cache_path_list.extend(file.split("/")[1:]) @@ -32,27 +32,27 @@ def cache_dir(path, file): return cache_path -def cache_path(path, file, sha_hash, key): +def cache_path(path: str, file: str, sha_hash: str, key: str) -> str: cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash) return cache_path -def is_old_version(path, record): +def is_old_version(path: str, record: str) -> bool: version_hash = get_version_hash(path, record) file, key = record2name_key(record) meas_cache_path = os.path.join(cache_dir(path, file)) ls = [] + is_old = True for p, ds, fs in os.walk(meas_cache_path): ls.extend(fs) for filename in ls: if key == filename.split("_")[0]: - if not version_hash == filename.split("_")[1][:-2]: - return True - else: - return False + if version_hash == filename.split("_")[1][:-2]: + is_old = False + return is_old -def is_in_cache(path, record): +def is_in_cache(path: str, record: str) -> bool: version_hash = get_version_hash(path, record) file, key = record2name_key(record) return os.path.exists(cache_path(path, file, version_hash, key) + ".p") diff --git a/corrlib/find.py b/corrlib/find.py index 21063ec..5d0a678 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -4,7 +4,7 @@ import json import pandas as pd import numpy as np from .input.implementations import codes -from .tools import k2m, get_db_file +from .tools import k2m, db_filename from .tracker import get from typing import Any, Optional @@ -230,7 +230,7 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: - db_file = get_db_file(path) + db_file = db_filename(path) db = os.path.join(path, db_file) if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) @@ -262,7 +262,7 @@ def find_project(path: str, name: str) -> str: uuid: str The uuid of the project in question. """ - db_file = get_db_file(path) + db_file = db_filename(path) get(path, db_file) return _project_lookup_by_alias(os.path.join(path, db_file), name) @@ -281,7 +281,7 @@ def list_projects(path: str) -> list[tuple[str, str]]: results: list[Any] The projects known to the library. """ - db_file = get_db_file(path) + db_file = db_filename(path) get(path, db_file) conn = sqlite3.connect(os.path.join(path, db_file)) c = conn.cursor() diff --git a/corrlib/main.py b/corrlib/main.py index 88b99b3..df0cd7a 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -5,7 +5,7 @@ import os from .git_tools import move_submodule import shutil from .find import _project_lookup_by_id -from .tools import list2str, str2list, get_db_file +from .tools import list2str, str2list, db_filename from .tracker import get, save, unlock, clone, drop from typing import Union, Optional @@ -25,7 +25,7 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni code: str (optional) The code that was used to create the measurements. """ - db_file = get_db_file(path) + db_file = db_filename(path) db = os.path.join(path, db_file) get(path, db_file) conn = sqlite3.connect(db) @@ -64,7 +64,7 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] value: str or None Value to se `prop` to. """ - db_file = get_db_file(path) + db_file = db_filename(path) get(path, db_file) conn = sqlite3.connect(os.path.join(path, db_file)) c = conn.cursor() @@ -75,7 +75,7 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: - db_file = get_db_file(path) + db_file = db_filename(path) db = os.path.join(path, db_file) get(path, db_file) known_data = _project_lookup_by_id(db, uuid)[0] @@ -135,7 +135,7 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti if not uuid: raise ValueError("The dataset does not have a uuid!") if not os.path.exists(path + "/projects/" + uuid): - db_file = get_db_file(path) + db_file = db_filename(path) get(path, db_file) unlock(path, db_file) create_project(path, uuid, owner, tags, aliases, code) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 0f77638..3344efb 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -3,12 +3,12 @@ import os import sqlite3 from .input import sfcf,openQCD import json -from typing import Union, Optional,Any +from typing import Union, Any from pyerrors import Obs, Corr, load_object, dump_object -from hashlib import sha256, sha1 +from hashlib import sha256 from .tools import record2name_key, name_key2record, make_version_hash from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash -from .tools import get_db_file, cache_enabled +from .tools import db_filename, cache_enabled from .tracker import get, save, unlock import shutil @@ -33,7 +33,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, parameter_file: str The parameter file used for the measurement. """ - db_file = get_db_file(path) + db_file = db_filename(path) db = os.path.join(path, db_file) get(path, db_file) unlock(path, db_file) @@ -204,7 +204,7 @@ def drop_record(path: str, meas_path: str) -> None: """ file_in_archive = meas_path.split("::")[0] file = os.path.join(path, file_in_archive) - db_file = get_db_file(path) + db_file = db_filename(path) db = os.path.join(path, db_file) get(path, db_file) sub_key = meas_path.split("::")[1] diff --git a/corrlib/tracker.py b/corrlib/tracker.py index 5cc281c..63aabf2 100644 --- a/corrlib/tracker.py +++ b/corrlib/tracker.py @@ -3,7 +3,7 @@ from configparser import ConfigParser import datalad.api as dl from typing import Optional import shutil -from .tools import get_db_file +from .tools import db_filename def get_tracker(path: str) -> str: @@ -43,7 +43,7 @@ def get(path: str, file: str) -> None: """ tracker = get_tracker(path) if tracker == 'datalad': - if file == get_db_file(path): + if file == db_filename(path): print("Downloading database...") else: print("Downloading data...") From 6ea1827b99fbb250aa9680c575094a9533d8eff0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 09:44:22 +0100 Subject: [PATCH 017/111] add getter for cache_dir_name and rename db filename getter --- corrlib/tools.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 6dc6aec..e46ce0a 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,8 +1,7 @@ import os -import datalad.api as dl import hashlib from configparser import ConfigParser -from typing import Any +from typing import Any, Union CONFIG_FILENAME = ".corrlib" cached: bool = True @@ -77,16 +76,6 @@ def k2m(k: float) -> float: return (1/(2*k))-4 -def get_file(path: str, file: str) -> None: - if file == get_db_file(path): - print("Downloading database...") - else: - print("Downloading data...") - dl.get(os.path.join(path, file), dataset=path) - print("> downloaded file") - return - - def record2name_key(record_path: str) -> tuple[str, str]: """ Convert a record to a pair of name and key. @@ -155,7 +144,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None: return -def get_db_file(path: str) -> str: +def db_filename(path: str) -> str: """ Get the database file associated with the library at the given path. @@ -199,3 +188,28 @@ def cache_enabled(path: str) -> bool: cached_str = config.get('core', 'cached', fallback='True') cached_bool = cached_str == ('True') return cached_bool + + +def cache_dir_name(path: str) -> Union[str, None]: + """ + Get the database file associated with the library at the given path. + + Parameters + ---------- + path: str + The path of the library. + + Returns + ------- + db_file: str + The file holding the database. + """ + config_path = os.path.join(path, CONFIG_FILENAME) + config = ConfigParser() + if os.path.exists(config_path): + config.read(config_path) + if cache_enabled(path): + cache = config.get('paths', 'cache', fallback='.cache') + else: + cache = None + return cache From f8b70f07c9bd38b43fdcc841be9fbbc2db7a8b8a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 09:45:05 +0100 Subject: [PATCH 018/111] add cache dir name to config --- corrlib/initialization.py | 1 + 1 file changed, 1 insertion(+) diff --git a/corrlib/initialization.py b/corrlib/initialization.py index 37ab3e5..0b7be48 100644 --- a/corrlib/initialization.py +++ b/corrlib/initialization.py @@ -72,6 +72,7 @@ def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: 'archive_path': 'archive', 'toml_imports_path': 'toml_imports', 'import_scripts_path': 'import_scripts', + 'cache_path': '.cache', } return config From 5e1be236ee6074c730df2579023f7c7d182293f5 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 09:47:12 +0100 Subject: [PATCH 019/111] TEMPFIX: add tools --- corrlib/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/corrlib/__init__.py b/corrlib/__init__.py index 4e1b364..448b4d5 100644 --- a/corrlib/__init__.py +++ b/corrlib/__init__.py @@ -22,3 +22,4 @@ from .meas_io import load_records as load_records from .find import find_project as find_project from .find import find_record as find_record from .find import list_projects as list_projects +from .tools import * From 0d6ad8f552483c49a1afa025fa17f53eefcba39e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 10:17:15 +0100 Subject: [PATCH 020/111] add a simple method to show the statistics of a record --- corrlib/cli.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 414fcc4..f205026 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -7,6 +7,7 @@ from .find import find_record, list_projects from .tools import str2list from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache +from .meas_io import load_record as mio_load_record import os from importlib.metadata import version @@ -35,6 +36,7 @@ def update( update_project(path, uuid) return + @app.command() def list( path: str = typer.Option( @@ -94,12 +96,39 @@ def find( ensemble: str = typer.Argument(), corr: str = typer.Argument(), code: str = typer.Argument(), + arg: str = typer.Option( + str('all'), + "--argument", + "-a", + ), ) -> None: """ Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. """ results = find_record(path, ensemble, corr, code) - print(results) + if arg == 'all': + print(results) + else: + for r in results[arg].values: + print(r) + + +@app.command() +def stat( + path: str = typer.Option( + str('./corrlib'), + "--dataset", + "-d", + ), + record: str = typer.Argument(), + ) -> None: + """ + Show the statistics of a given record. + """ + record = mio_load_record(path, record)[0] + statistics = record.idl + print(statistics) + return @app.command() From 60b56dfb25db30322b973684e42fbbc07993772d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 16:54:14 +0100 Subject: [PATCH 021/111] fix the file finder for sfcf --- corrlib/input/sfcf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 6a75b72..621f736 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -3,6 +3,7 @@ import datalad.api as dl import json import os from typing import Any +from fnmatch import fnmatch bi_corrs: list[str] = ["f_P", "fP", "f_p", @@ -298,9 +299,10 @@ def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: if not appended: compact = (version[-1] == "c") for i, item in enumerate(ls): - rep_path = directory + '/' + item - sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, []) - files_to_get.extend([rep_path + "/" + filename for filename in sub_ls]) + if fnmatch(item, prefix + "*"): + rep_path = directory + '/' + item + sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, []) + files_to_get.extend([rep_path + "/" + filename for filename in sub_ls]) print("Getting data, this might take a while...") From 875d7b9461ef9b763853e435c24d1fe2ed3d036c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Feb 2026 09:45:00 +0100 Subject: [PATCH 022/111] write explicit setup-uv link --- .github/workflows/mypy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 791243f..a75fa57 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -20,7 +20,7 @@ jobs: with: show-progress: true - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: https://github.com/astral-sh/setup-uv@v7 with: python-version: ${{ matrix.python-version }} enable-cache: true From 540160c51f4c998341b6441f984d8e7b0bfdc2fd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Feb 2026 09:14:12 +0100 Subject: [PATCH 023/111] use older setup-uv action --- .github/workflows/mypy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index a75fa57..0add4e9 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -14,13 +14,13 @@ jobs: - name: Install git-annex run: | sudo apt-get update - sudo apt-get install -y git-annex + sudo apt-get install -y git-annex - name: Check out the repository uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v7 + uses: https://github.com/astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} enable-cache: true From 373f3476c070c0ff116f5a29f51c068a099a36f2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Feb 2026 09:18:51 +0100 Subject: [PATCH 024/111] explicit install-uv version --- .github/workflows/mypy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 0add4e9..c2a36c4 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -20,7 +20,7 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6 + uses: https://github.com/astral-sh/setup-uv@v6.8.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From b2ac8939a33087260a74d375b97a040d8c40640f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Feb 2026 11:20:28 +0100 Subject: [PATCH 025/111] fix: cli show stat failed for single values --- corrlib/cli.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index f205026..5ceb5e3 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -9,6 +9,7 @@ from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache from .meas_io import load_record as mio_load_record import os +from pyerrors import Corr from importlib.metadata import version @@ -120,12 +121,14 @@ def stat( "--dataset", "-d", ), - record: str = typer.Argument(), + record_id: str = typer.Argument(), ) -> None: """ Show the statistics of a given record. """ - record = mio_load_record(path, record)[0] + record = mio_load_record(path, record_id) + if isinstance(record, Corr): + record = record[0] statistics = record.idl print(statistics) return From b51a69bc69b4d634e43adcf86f54fcc3e5201a98 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 6 Mar 2026 15:35:49 +0100 Subject: [PATCH 026/111] fix file unlock --- corrlib/tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/tracker.py b/corrlib/tracker.py index 5cc281c..e535b03 100644 --- a/corrlib/tracker.py +++ b/corrlib/tracker.py @@ -114,7 +114,7 @@ def unlock(path: str, file: str) -> None: """ tracker = get_tracker(path) if tracker == 'datalad': - dl.unlock(file, dataset=path) + dl.unlock(os.path.join(path, file), dataset=path) elif tracker == 'None': Warning("Tracker 'None' does not implement unlock.") pass From 6b2db911bf7ee11e19f442e425a9ee48c28e3969 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 12:56:27 +0100 Subject: [PATCH 027/111] add list for stat types --- corrlib/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 5ceb5e3..4e1b65e 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -39,7 +39,7 @@ def update( @app.command() -def list( +def lister( path: str = typer.Option( str('./corrlib'), "--dataset", @@ -127,7 +127,7 @@ def stat( Show the statistics of a given record. """ record = mio_load_record(path, record_id) - if isinstance(record, Corr): + if isinstance(record, (list, Corr)): record = record[0] statistics = record.idl print(statistics) From a9cc2b3f48199877eedd40f87db27dffe0a73251 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 12:57:48 +0100 Subject: [PATCH 028/111] fix write measurement call and reporting to user --- corrlib/toml.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index 629a499..feafaf6 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -189,7 +189,6 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: measurement = sfcf.read_data(path, uuid, md['path'], md['prefix'], param, version=md['version'], cfg_seperator=md['cfg_seperator'], sep='/') - print(mname + " imported.") elif project['code'] == 'openQCD': if md['measurement'] == 'ms1': param = openQCD.read_ms1_param(path, uuid, md['param_file']) @@ -211,8 +210,8 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: param['type'] = 't1' measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) - - write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else '')) + write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) + print(mname + " imported.") if not os.path.exists(os.path.join(path, "toml_imports", uuid)): os.makedirs(os.path.join(path, "toml_imports", uuid)) From 96731baeb9e9db0c5002c1cdbc3ca1dfb75ca52b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 12:59:04 +0100 Subject: [PATCH 029/111] fix when files are unlocked or saved --- corrlib/meas_io.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 65a0569..300adc3 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -34,22 +34,28 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, """ db_file = get_db_file(path) db = os.path.join(path, db_file) + + files_to_save = [] + get(path, db_file) unlock(path, db_file) + files_to_save.append(db_file) + conn = sqlite3.connect(db) c = conn.cursor() - files = [] for corr in measurement.keys(): file_in_archive = os.path.join('.', 'archive', ensemble, corr, uuid + '.json.gz') file = os.path.join(path, file_in_archive) - files.append(file) known_meas = {} if not os.path.exists(os.path.join(path, '.', 'archive', ensemble, corr)): os.makedirs(os.path.join(path, '.', 'archive', ensemble, corr)) + files_to_save.append(file_in_archive) else: if os.path.exists(file): - unlock(path, file_in_archive) - known_meas = pj.load_json_dict(file) + if file not in files_to_save: + unlock(path, file_in_archive) + files_to_save.append(file_in_archive) + known_meas = pj.load_json_dict(file, verbose=False) if code == "sfcf": parameters = sfcf.read_param(path, uuid, parameter_file) pars = {} @@ -98,9 +104,8 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) conn.commit() pj.dump_dict_to_json(known_meas, file) - files.append(os.path.join(path, db_file)) conn.close() - save(path, message="Add measurements to database", files=files) + save(path, message="Add measurements to database", files=files_to_save) return From 52f6b0f53c558ad86635111502bee39a525d50fc Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 13:00:30 +0100 Subject: [PATCH 030/111] silence readers --- corrlib/input/sfcf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 621f736..8b6e1a3 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -320,10 +320,10 @@ def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: if not param['crr'] == []: if names is not None: data_crr = pe.input.sfcf.read_sfcf_multi(directory, prefix, param['crr'], param['mrr'], corr_type_list, range(len(param['wf_offsets'])), - range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, names=names) + range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, silent=True, names=names) else: data_crr = pe.input.sfcf.read_sfcf_multi(directory, prefix, param['crr'], param['mrr'], corr_type_list, range(len(param['wf_offsets'])), - range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True) + range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, silent=True) for key in data_crr.keys(): data[key] = data_crr[key] From 54b42040a9144033ca9b7aec7d7edfe38057e0e6 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:07:54 +0100 Subject: [PATCH 031/111] use v6 of astral action --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 1fcb8fe..c82f6d1 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} enable-cache: true From 0e0153bd1d6a96f18b81cfc20398d12c5466c831 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:23:49 +0100 Subject: [PATCH 032/111] update uv setup after runner upate --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index c82f6d1..0ae798d 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: astral-sh/setup-uv@v6 + uses: https://github.com/astral-sh/setup-uv@v7.6.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From ca2eb081bb63c3d682d79a11026d6d2a75aad98e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:29:34 +0100 Subject: [PATCH 033/111] older version again --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 0ae798d..6552286 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v7.6.0 + uses: https://github.com/astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} enable-cache: true From 67a9e4ea4b3d15036c1455a62e60c68a95fcd3ce Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:31:03 +0100 Subject: [PATCH 034/111] use 6.4.0 --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 6552286..82e7484 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6 + uses: https://github.com/astral-sh/setup-uv@v6.4.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From 53067f7c476be759230001949689c8bd04833b86 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:47:02 +0100 Subject: [PATCH 035/111] use v5 --- .github/workflows/pytest.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 82e7484..9c82795 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,10 +29,9 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6.4.0 + uses: https://github.com/astral-sh/setup-uv@v5 with: python-version: ${{ matrix.python-version }} - enable-cache: true - name: Install corrlib run: uv sync --locked --all-extras --dev --python ${{ matrix.python-version }} - name: Run tests From 4a821006ed3a38574f3f188f35d55bd5862aa558 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:49:35 +0100 Subject: [PATCH 036/111] add setup python --- .github/workflows/pytest.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 9c82795..cd5c0c9 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -28,6 +28,10 @@ jobs: uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 with: From f05caf572dca93d8f5b0758b9b5837716146500b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:52:57 +0100 Subject: [PATCH 037/111] roll out changes --- .github/workflows/mypy.yaml | 7 ++++--- .github/workflows/pytest.yaml | 4 +--- .github/workflows/ruff.yaml | 8 +++++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index c2a36c4..b8ab802 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -19,11 +19,12 @@ jobs: uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true - - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6.8.0 + - name: Setup python + uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - enable-cache: true + - name: Install uv + uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib run: uv sync --locked --all-extras --dev --python "3.12" - name: Run tests diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index cd5c0c9..af3b667 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,13 +29,11 @@ jobs: with: show-progress: true - name: Setup python - uses: actions/setup-python@v5 + uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - with: - python-version: ${{ matrix.python-version }} - name: Install corrlib run: uv sync --locked --all-extras --dev --python ${{ matrix.python-version }} - name: Run tests diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index 4de4b0b..778743b 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -20,10 +20,12 @@ jobs: uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true - - name: Install uv - uses: astral-sh/setup-uv@v7 + - name: Setup python + uses: https://github.com/actions/setup-python@v5 with: - enable-cache: true + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib run: uv sync --locked --all-extras --dev --python "3.12" - name: Run tests From 0c01d18ecbfac610ef650659f9ebbcf8d352c605 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:56:31 +0100 Subject: [PATCH 038/111] use python 3.12 for mypy and ruff --- .github/workflows/mypy.yaml | 2 +- .github/workflows/ruff.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index b8ab802..4781688 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -22,7 +22,7 @@ jobs: - name: Setup python uses: https://github.com/actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: "3.12" - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index 778743b..e0db1b0 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -23,7 +23,7 @@ jobs: - name: Setup python uses: https://github.com/actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: "3.12" - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib From 4853c0e414959973721ef0d5a849f6214e950502 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:58:33 +0100 Subject: [PATCH 039/111] fix type error for now --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 300adc3..2a8c986 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -12,7 +12,7 @@ import shutil from typing import Any -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None: +def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union(Any, None)) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. From 7ce9742ed562c0f9924ca1ea710c85f7bcec9eaa Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:59:45 +0100 Subject: [PATCH 040/111] fix invalid escape in docs --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index 21063ec..022a3f5 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -119,7 +119,7 @@ def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: - """ + r""" Filter method for the Database entries holding SFCF calculations. Parameters From d302ae7e0d40397ba90bef4015a7c0f718237031 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:01:59 +0100 Subject: [PATCH 041/111] fix typo in type annotations --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 2a8c986..2f08052 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -12,7 +12,7 @@ import shutil from typing import Any -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union(Any, None)) -> None: +def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[Any, None]) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. From bd581c6c126bc7ada966e754b2a25cc84fb941b2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:07:57 +0100 Subject: [PATCH 042/111] set up git --- .github/workflows/pytest.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index af3b667..83e88ae 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -20,6 +20,10 @@ jobs: env: UV_CACHE_DIR: /tmp/.uv-cache steps: + - name: Setup git + run: | + git config --global user.email "tester@example.com" + git config --global user.name "Tester" - name: Install git-annex run: | sudo apt-get update From c6f3603fbf36f93d9201fc538de3255c505a0629 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:12:57 +0100 Subject: [PATCH 043/111] Throw errors when parmeter file is not set --- corrlib/meas_io.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 2f08052..8e5855d 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -12,7 +12,7 @@ import shutil from typing import Any -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[Any, None]) -> None: +def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[str, None]) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -57,7 +57,10 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, files_to_save.append(file_in_archive) known_meas = pj.load_json_dict(file, verbose=False) if code == "sfcf": - parameters = sfcf.read_param(path, uuid, parameter_file) + if parameter_file is not None: + parameters = sfcf.read_param(path, uuid, parameter_file) + else: + raise Exception("Need parameter file for this code!") pars = {} subkeys = list(measurement[corr].keys()) for subkey in subkeys: @@ -66,7 +69,10 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, elif code == "openQCD": ms_type = list(measurement.keys())[0] if ms_type == 'ms1': - parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + if parameter_file is not None: + parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + else: + raise Exception("Need parameter file for this code!") pars = {} subkeys = [] for i in range(len(parameters["rw_fcts"])): From 6cfa51f878c358730effaa58174daba7fe290818 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:42:55 +0100 Subject: [PATCH 044/111] setup local cache --- .github/workflows/mypy.yaml | 5 ++++- .github/workflows/pytest.yaml | 6 +++++- .github/workflows/ruff.yaml | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 4781688..8e276d4 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -9,7 +9,10 @@ jobs: mypy: runs-on: ubuntu-latest env: - UV_CACHE_DIR: /tmp/.uv-cache + UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache + RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache + AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir + RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 83e88ae..3411646 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -18,7 +18,10 @@ jobs: runs-on: ubuntu-latest env: - UV_CACHE_DIR: /tmp/.uv-cache + UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache + RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache + AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir + RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Setup git run: | @@ -36,6 +39,7 @@ jobs: uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: 'pip' - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index e0db1b0..db42edb 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -10,7 +10,10 @@ jobs: runs-on: ubuntu-latest env: - UV_CACHE_DIR: /tmp/.uv-cache + UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache + RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache + AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir + RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | From 94b677262a239f483f673fae2f7abf3e0f3e707c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:45:01 +0100 Subject: [PATCH 045/111] remove cache envs --- .github/workflows/mypy.yaml | 3 --- .github/workflows/pytest.yaml | 3 --- .github/workflows/ruff.yaml | 3 --- 3 files changed, 9 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 8e276d4..fdb5bee 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -10,9 +10,6 @@ jobs: runs-on: ubuntu-latest env: UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache - RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache - AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir - RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 3411646..286afbe 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -19,9 +19,6 @@ jobs: runs-on: ubuntu-latest env: UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache - RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache - AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir - RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Setup git run: | diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index db42edb..a1cb972 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -11,9 +11,6 @@ jobs: runs-on: ubuntu-latest env: UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache - RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache - AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir - RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | From c2296f00ee84eef8fabe20d99aae52c890736ea4 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:47:36 +0100 Subject: [PATCH 046/111] remove uv cache --- .github/workflows/mypy.yaml | 2 -- .github/workflows/pytest.yaml | 2 -- .github/workflows/ruff.yaml | 2 -- 3 files changed, 6 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index fdb5bee..fbd51ec 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -8,8 +8,6 @@ on: jobs: mypy: runs-on: ubuntu-latest - env: - UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache steps: - name: Install git-annex run: | diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 286afbe..da44258 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -17,8 +17,6 @@ jobs: - "3.14" runs-on: ubuntu-latest - env: - UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache steps: - name: Setup git run: | diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index a1cb972..1da1225 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -9,8 +9,6 @@ jobs: ruff: runs-on: ubuntu-latest - env: - UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache steps: - name: Install git-annex run: | From a5d6b978ea5d4b642828f220cd2c2b07772af089 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 17:25:44 +0100 Subject: [PATCH 047/111] remove pip cache --- .github/workflows/pytest.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index da44258..b1a4d94 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -34,7 +34,6 @@ jobs: uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'pip' - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib From b65ee83698df8f8f292670e4aea4565f0595150d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 23:37:40 +0100 Subject: [PATCH 048/111] fix list test --- tests/cli_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/cli_test.py b/tests/cli_test.py index a6b0bd7..d4a4045 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -37,7 +37,7 @@ def test_init_db(tmp_path): table_names = [table[0] for table in tables] for expected_table in expected_tables: assert expected_table in table_names - + cursor.execute("SELECT * FROM projects;") projects = cursor.fetchall() assert len(projects) == 0 @@ -60,7 +60,7 @@ def test_init_db(tmp_path): project_column_names = [col[1] for col in project_columns] for expected_col in expected_project_columns: assert expected_col in project_column_names - + cursor.execute("PRAGMA table_info('backlogs');") backlog_columns = cursor.fetchall() expected_backlog_columns = [ @@ -85,7 +85,7 @@ def test_list(tmp_path): dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 - result = runner.invoke(app, ["list", "--dataset", str(dataset_path), "ensembles"]) + result = runner.invoke(app, ["lister", "--dataset", str(dataset_path), "ensembles"]) assert result.exit_code == 0 - result = runner.invoke(app, ["list", "--dataset", str(dataset_path), "projects"]) + result = runner.invoke(app, ["lister", "--dataset", str(dataset_path), "projects"]) assert result.exit_code == 0 From 776e4a3d8d131d3f724eae1d266ea3df374d3340 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 12:59:33 +0100 Subject: [PATCH 049/111] add further tests for tools --- tests/tools_test.py | 62 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/tests/tools_test.py b/tests/tools_test.py index ee76f1c..60a5a4a 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -1,31 +1,79 @@ - - from corrlib import tools as tl +from configparser import ConfigParser +import os -def test_m2k(): +def test_m2k() -> None: for m in [0.1, 0.5, 1.0]: expected_k = 1 / (2 * m + 8) assert tl.m2k(m) == expected_k -def test_k2m(): +def test_k2m() -> None: for m in [0.1, 0.5, 1.0]: assert tl.k2m(m) == (1/(2*m))-4 -def test_k2m_m2k(): +def test_k2m_m2k() -> None: for m in [0.1, 0.5, 1.0]: k = tl.m2k(m) m_converted = tl.k2m(k) assert abs(m - m_converted) < 1e-9 -def test_str2list(): +def test_str2list() -> None: assert tl.str2list("a,b,c") == ["a", "b", "c"] assert tl.str2list("1,2,3") == ["1", "2", "3"] -def test_list2str(): +def test_list2str() -> None: assert tl.list2str(["a", "b", "c"]) == "a,b,c" assert tl.list2str(["1", "2", "3"]) == "1,2,3" + + +def test_set_config(tmp_path: str) -> None: + section = "core" + option = "test_option" + value = "test_value" + # config is not yet available + tl.set_config(tmp_path, section, option, value) + config_path = os.path.join(tmp_path, '.corrlib') + config = ConfigParser() + config.read(config_path) + assert config.get('core', 'test_option', fallback="not the value") == "test_value" + # now, a config file is already present + section = "core" + option = "test_option2" + value = "test_value2" + tl.set_config(tmp_path, section, option, value) + config_path = os.path.join(tmp_path, '.corrlib') + config = ConfigParser() + config.read(config_path) + assert config.get('core', 'test_option2', fallback="not the value") == "test_value2" + # update option 2 + section = "core" + option = "test_option2" + value = "test_value3" + tl.set_config(tmp_path, section, option, value) + config_path = os.path.join(tmp_path, '.corrlib') + config = ConfigParser() + config.read(config_path) + assert config.get('core', 'test_option2', fallback="not the value") == "test_value3" + + +def test_get_db_file(tmp_path: str) -> None: + section = "paths" + option = "db" + value = "test_value" + # config is not yet available + tl.set_config(tmp_path, section, option, value) + assert tl.get_db_file(tmp_path) == "test_value" + + +def test_cache_enabled(tmp_path: str) -> None: + section = "core" + option = "cached" + value = "True" + # config is not yet available + tl.set_config(tmp_path, section, option, value) + assert tl.get_db_file(tmp_path) From 7e76966d5f7ba9d4b7cfcd6e95e5988c1de21c35 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 12:59:59 +0100 Subject: [PATCH 050/111] replace config file name with var --- corrlib/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 118b094..26cbf0a 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -88,7 +88,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None: value: Any The value we set the option to. """ - config_path = os.path.join(path, '.corrlib') + config_path = os.path.join(path, CONFIG_FILENAME) config = ConfigParser() if os.path.exists(config_path): config.read(config_path) From 6bb48f151c32ef09bc5119dbbc7aedc0816a794e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 13:01:58 +0100 Subject: [PATCH 051/111] add types --- tests/cli_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/cli_test.py b/tests/cli_test.py index d4a4045..cba0a10 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -2,18 +2,19 @@ from typer.testing import CliRunner from corrlib.cli import app import os import sqlite3 as sql +from pathlib import Path runner = CliRunner() -def test_version(): +def test_version() -> None: result = runner.invoke(app, ["--version"]) assert result.exit_code == 0 assert "corrlib" in result.output -def test_init_folders(tmp_path): +def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 @@ -21,7 +22,7 @@ def test_init_folders(tmp_path): assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_db(tmp_path): +def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 @@ -81,7 +82,7 @@ def test_init_db(tmp_path): assert expected_col in backlog_column_names -def test_list(tmp_path): +def test_list(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 From 92f307b83ac794f181d4a855f922f3fa5f9532c8 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 13:05:33 +0100 Subject: [PATCH 052/111] use Path in type annotations --- tests/import_project_test.py | 2 +- tests/sfcf_in_test.py | 2 +- tests/test_initialization.py | 9 +++++---- tests/tools_test.py | 7 ++++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/import_project_test.py b/tests/import_project_test.py index 2dea06f..685d2cf 100644 --- a/tests/import_project_test.py +++ b/tests/import_project_test.py @@ -1,7 +1,7 @@ import corrlib.toml as t -def test_toml_check_measurement_data(): +def test_toml_check_measurement_data() -> None: measurements = { "a": { diff --git a/tests/sfcf_in_test.py b/tests/sfcf_in_test.py index 72921e7..5e4ff83 100644 --- a/tests/sfcf_in_test.py +++ b/tests/sfcf_in_test.py @@ -1,7 +1,7 @@ import corrlib.input.sfcf as input import json -def test_get_specs(): +def test_get_specs() -> None: parameters = { 'crr': [ 'f_P', 'f_A' diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 1ea0ece..9284c82 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -1,22 +1,23 @@ import corrlib.initialization as init import os import sqlite3 as sql +from pathlib import Path -def test_init_folders(tmp_path): +def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path)) assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_folders_no_tracker(tmp_path): +def test_init_folders_no_tracker(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path), tracker="None") assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_config(tmp_path): +def test_init_config(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path), tracker="None") config_path = dataset_path / ".corrlib" @@ -34,7 +35,7 @@ def test_init_config(tmp_path): assert config.get("paths", "import_scripts_path") == "import_scripts" -def test_init_db(tmp_path): +def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) diff --git a/tests/tools_test.py b/tests/tools_test.py index 60a5a4a..0399be0 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -1,6 +1,7 @@ from corrlib import tools as tl from configparser import ConfigParser import os +from pathlib import Path def test_m2k() -> None: @@ -31,7 +32,7 @@ def test_list2str() -> None: assert tl.list2str(["1", "2", "3"]) == "1,2,3" -def test_set_config(tmp_path: str) -> None: +def test_set_config(tmp_path: Path) -> None: section = "core" option = "test_option" value = "test_value" @@ -61,7 +62,7 @@ def test_set_config(tmp_path: str) -> None: assert config.get('core', 'test_option2', fallback="not the value") == "test_value3" -def test_get_db_file(tmp_path: str) -> None: +def test_get_db_file(tmp_path: Path) -> None: section = "paths" option = "db" value = "test_value" @@ -70,7 +71,7 @@ def test_get_db_file(tmp_path: str) -> None: assert tl.get_db_file(tmp_path) == "test_value" -def test_cache_enabled(tmp_path: str) -> None: +def test_cache_enabled(tmp_path: Path) -> None: section = "core" option = "cached" value = "True" From 97e30fa27d5eed7db5133899ba9efafeef7c7c6d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 13:06:12 +0100 Subject: [PATCH 053/111] use Path in type annotations oat 2 --- corrlib/tools.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 26cbf0a..72112c5 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,6 +1,7 @@ import os from configparser import ConfigParser from typing import Any +from pathlib import Path CONFIG_FILENAME = ".corrlib" cached: bool = True @@ -73,7 +74,7 @@ def k2m(k: float) -> float: return (1/(2*k))-4 -def set_config(path: str, section: str, option: str, value: Any) -> None: +def set_config(path: Path, section: str, option: str, value: Any) -> None: """ Set configuration parameters for the library. @@ -100,7 +101,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None: return -def get_db_file(path: str) -> str: +def get_db_file(path: Path) -> str: """ Get the database file associated with the library at the given path. @@ -122,7 +123,7 @@ def get_db_file(path: str) -> str: return db_file -def cache_enabled(path: str) -> bool: +def cache_enabled(path: Path) -> bool: """ Check, whether the library is cached. Fallback is true. From 110ddaf3a1ad141c210c0d1d23ab85b85a930af7 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 15:21:34 +0100 Subject: [PATCH 054/111] add error messages --- corrlib/tools.py | 6 ++++++ tests/tools_test.py | 11 ++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 72112c5..727ed30 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -119,6 +119,8 @@ def get_db_file(path: Path) -> str: config = ConfigParser() if os.path.exists(config_path): config.read(config_path) + else: + raise FileNotFoundError("Configuration file not found.") db_file = config.get('paths', 'db', fallback='backlogger.db') return db_file @@ -142,6 +144,10 @@ def cache_enabled(path: Path) -> bool: config = ConfigParser() if os.path.exists(config_path): config.read(config_path) + else: + raise FileNotFoundError("Configuration file not found.") cached_str = config.get('core', 'cached', fallback='True') + if cached_str not in ['True', 'False']: + raise ValueError(f"String {cached_str} is not a valid option, only True and False are allowed!") cached_bool = cached_str == ('True') return cached_bool diff --git a/tests/tools_test.py b/tests/tools_test.py index 0399be0..9be88b4 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -2,6 +2,7 @@ from corrlib import tools as tl from configparser import ConfigParser import os from pathlib import Path +import pytest def test_m2k() -> None: @@ -74,7 +75,11 @@ def test_get_db_file(tmp_path: Path) -> None: def test_cache_enabled(tmp_path: Path) -> None: section = "core" option = "cached" - value = "True" # config is not yet available - tl.set_config(tmp_path, section, option, value) - assert tl.get_db_file(tmp_path) + tl.set_config(tmp_path, section, option, "True") + assert tl.cache_enabled(tmp_path) + tl.set_config(tmp_path, section, option, "False") + assert not tl.cache_enabled(tmp_path) + tl.set_config(tmp_path, section, option, "lalala") + with pytest.raises(ValueError) as e_info: + tl.cache_enabled(tmp_path) From 8162758ceca1985ad6818cd803426ff0e5b207cf Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 16:15:55 +0100 Subject: [PATCH 055/111] use pathlib.Path for directories and files --- corrlib/cli.py | 41 ++++++++++++------------ corrlib/find.py | 17 +++++----- corrlib/git_tools.py | 19 ++++++------ corrlib/initialization.py | 23 +++++++------- corrlib/input/openQCD.py | 11 ++++--- corrlib/input/sfcf.py | 7 +++-- corrlib/main.py | 24 +++++++-------- corrlib/meas_io.py | 60 +++++++++++++++++++----------------- corrlib/toml.py | 23 +++++++------- corrlib/tools.py | 4 +-- corrlib/tracker.py | 17 +++++----- tests/test_initialization.py | 8 ++--- tests/tools_test.py | 8 ++--- 13 files changed, 137 insertions(+), 125 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 4e1b65e..b28692a 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -11,6 +11,7 @@ from .meas_io import load_record as mio_load_record import os from pyerrors import Corr from importlib.metadata import version +from pathlib import Path app = typer.Typer() @@ -24,8 +25,8 @@ def _version_callback(value: bool) -> None: @app.command() def update( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -40,8 +41,8 @@ def update( @app.command() def lister( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -52,8 +53,8 @@ def lister( """ if entities in ['ensembles', 'Ensembles','ENSEMBLES']: print("Ensembles:") - for item in os.listdir(path + "/archive"): - if os.path.isdir(os.path.join(path + "/archive", item)): + for item in os.listdir(path / "archive"): + if os.path.isdir(path / "archive" / item): print(item) elif entities == 'projects': results = list_projects(path) @@ -71,8 +72,8 @@ def lister( @app.command() def alias_add( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -89,8 +90,8 @@ def alias_add( @app.command() def find( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -116,8 +117,8 @@ def find( @app.command() def stat( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -136,8 +137,8 @@ def stat( @app.command() def importer( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -159,8 +160,8 @@ def importer( @app.command() def reimporter( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -183,8 +184,8 @@ def reimporter( @app.command() def init( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -203,8 +204,8 @@ def init( @app.command() def drop_cache( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), diff --git a/corrlib/find.py b/corrlib/find.py index 022a3f5..faef5db 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -7,9 +7,10 @@ from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get from typing import Any, Optional +from pathlib import Path -def _project_lookup_by_alias(db: str, alias: str) -> str: +def _project_lookup_by_alias(db: Path, alias: str) -> str: """ Lookup a projects UUID by its (human-readable) alias. @@ -37,7 +38,7 @@ def _project_lookup_by_alias(db: str, alias: str) -> str: return str(results[0][0]) -def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: +def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: """ Return the project information available in the database by UUID. @@ -61,7 +62,7 @@ def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: return results -def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, +def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: """ Look up a correlator record in the database by the data given to the method. @@ -228,10 +229,10 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: return results.drop(drops) -def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, +def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) @@ -246,7 +247,7 @@ def find_record(path: str, ensemble: str, correlator_name: str, code: str, proje return results.reset_index() -def find_project(path: str, name: str) -> str: +def find_project(path: Path, name: str) -> str: """ Find a project by it's human readable name. @@ -264,10 +265,10 @@ def find_project(path: str, name: str) -> str: """ db_file = get_db_file(path) get(path, db_file) - return _project_lookup_by_alias(os.path.join(path, db_file), name) + return _project_lookup_by_alias(path / db_file, name) -def list_projects(path: str) -> list[tuple[str, str]]: +def list_projects(path: Path) -> list[tuple[str, str]]: """ List all projects known to the library. diff --git a/corrlib/git_tools.py b/corrlib/git_tools.py index c6e7522..d77f109 100644 --- a/corrlib/git_tools.py +++ b/corrlib/git_tools.py @@ -1,27 +1,28 @@ import os from .tracker import save import git +from pathlib import Path GITMODULES_FILE = '.gitmodules' -def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: +def move_submodule(repo_path: Path, old_path: Path, new_path: Path) -> None: """ Move a submodule to a new location. Parameters ---------- - repo_path: str + repo_path: Path Path to the repository. - old_path: str + old_path: Path The old path of the module. - new_path: str + new_path: Path The new path of the module. """ - os.rename(os.path.join(repo_path, old_path), os.path.join(repo_path, new_path)) + os.rename(repo_path / old_path, repo_path / new_path) - gitmodules_file_path = os.path.join(repo_path, GITMODULES_FILE) + gitmodules_file_path = repo_path / GITMODULES_FILE # update paths in .gitmodules with open(gitmodules_file_path, 'r') as file: @@ -29,8 +30,8 @@ def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: updated_lines = [] for line in lines: - if old_path in line: - line = line.replace(old_path, new_path) + if str(old_path) in line: + line = line.replace(str(old_path), str(new_path)) updated_lines.append(line) with open(gitmodules_file_path, 'w') as file: @@ -40,6 +41,6 @@ def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: repo = git.Repo(repo_path) repo.git.add('.gitmodules') # save new state of the dataset - save(repo_path, message=f"Move module from {old_path} to {new_path}", files=['.gitmodules', repo_path]) + save(repo_path, message=f"Move module from {old_path} to {new_path}", files=[Path('.gitmodules'), repo_path]) return diff --git a/corrlib/initialization.py b/corrlib/initialization.py index bb71db6..c06a201 100644 --- a/corrlib/initialization.py +++ b/corrlib/initialization.py @@ -2,9 +2,10 @@ from configparser import ConfigParser import sqlite3 import os from .tracker import save, init +from pathlib import Path -def _create_db(db: str) -> None: +def _create_db(db: Path) -> None: """ Create the database file and the table. @@ -40,7 +41,7 @@ def _create_db(db: str) -> None: return -def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: +def _create_config(path: Path, tracker: str, cached: bool) -> ConfigParser: """ Create the config file construction for backlogger. @@ -75,7 +76,7 @@ def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: return config -def _write_config(path: str, config: ConfigParser) -> None: +def _write_config(path: Path, config: ConfigParser) -> None: """ Write the config file to disk. @@ -91,7 +92,7 @@ def _write_config(path: str, config: ConfigParser) -> None: return -def create(path: str, tracker: str = 'datalad', cached: bool = True) -> None: +def create(path: Path, tracker: str = 'datalad', cached: bool = True) -> None: """ Create folder of backlogs. @@ -107,13 +108,13 @@ def create(path: str, tracker: str = 'datalad', cached: bool = True) -> None: config = _create_config(path, tracker, cached) init(path, tracker) _write_config(path, config) - _create_db(os.path.join(path, config['paths']['db'])) - os.chmod(os.path.join(path, config['paths']['db']), 0o666) - os.makedirs(os.path.join(path, config['paths']['projects_path'])) - os.makedirs(os.path.join(path, config['paths']['archive_path'])) - os.makedirs(os.path.join(path, config['paths']['toml_imports_path'])) - os.makedirs(os.path.join(path, config['paths']['import_scripts_path'], 'template.py')) - with open(os.path.join(path, ".gitignore"), "w") as fp: + _create_db(path / config['paths']['db']) + os.chmod(path / config['paths']['db'], 0o666) + os.makedirs(path / config['paths']['projects_path']) + os.makedirs(path / config['paths']['archive_path']) + os.makedirs(path / config['paths']['toml_imports_path']) + os.makedirs(path / config['paths']['import_scripts_path'] / 'template.py') + with open(path / ".gitignore", "w") as fp: fp.write(".cache") fp.close() save(path, message="Initialized correlator library") diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 71ebec6..a3bce6f 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -3,9 +3,10 @@ import datalad.api as dl import os import fnmatch from typing import Any, Optional +from pathlib import Path -def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: +def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms1 measurements from a parameter file in the project. @@ -69,7 +70,7 @@ def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, A return param -def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: +def read_ms3_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms3 measurements from a parameter file in the project. @@ -103,7 +104,7 @@ def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, A return param -def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: +def read_rwms(path: Path, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: """ Read reweighting factor measurements from the project. @@ -160,7 +161,7 @@ def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any return rw_dict -def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str="", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: +def extract_t0(path: Path, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str="", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: """ Extract t0 measurements from the project. @@ -234,7 +235,7 @@ def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, An return t0_dict -def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = "", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: +def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = "", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: """ Extract t1 measurements from the project. diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 8b6e1a3..acd8261 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -4,6 +4,7 @@ import json import os from typing import Any from fnmatch import fnmatch +from pathlib import Path bi_corrs: list[str] = ["f_P", "fP", "f_p", @@ -80,7 +81,7 @@ for c in bib_corrs: corr_types[c] = 'bib' -def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: +def read_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters from the sfcf file. @@ -96,7 +97,7 @@ def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: """ - file = path + "/projects/" + project + '/' + file_in_project + file = path / "projects" / project / file_in_project dl.get(file, dataset=path) with open(file, 'r') as f: lines = f.readlines() @@ -257,7 +258,7 @@ def get_specs(key: str, parameters: dict[str, Any], sep: str = '/') -> str: return s -def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: dict[str, Any], version: str = '1.0c', cfg_seperator: str = 'n', sep: str = '/', **kwargs: Any) -> dict[str, Any]: +def read_data(path: Path, project: str, dir_in_project: str, prefix: str, param: dict[str, Any], version: str = '1.0c', cfg_seperator: str = 'n', sep: str = '/', **kwargs: Any) -> dict[str, Any]: """ Extract the data from the sfcf file. diff --git a/corrlib/main.py b/corrlib/main.py index 88b99b3..831b69d 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -8,9 +8,10 @@ from .find import _project_lookup_by_id from .tools import list2str, str2list, get_db_file from .tracker import get, save, unlock, clone, drop from typing import Union, Optional +from pathlib import Path -def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Union[list[str], None]=None, aliases: Union[list[str], None]=None, code: Union[str, None]=None) -> None: +def create_project(path: Path, uuid: str, owner: Union[str, None]=None, tags: Union[list[str], None]=None, aliases: Union[list[str], None]=None, code: Union[str, None]=None) -> None: """ Create a new project entry in the database. @@ -48,7 +49,7 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni return -def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] = None) -> None: +def update_project_data(path: Path, uuid: str, prop: str, value: Union[str, None] = None) -> None: """ Update/Edit a project entry in the database. Thin wrapper around sql3 call. @@ -74,9 +75,9 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] return -def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: +def update_aliases(path: Path, uuid: str, aliases: list[str]) -> None: db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file get(path, db_file) known_data = _project_lookup_by_id(db, uuid)[0] known_aliases = known_data[1] @@ -102,7 +103,7 @@ def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: return -def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Optional[list[str]]=None, aliases: Optional[list[str]]=None, code: Optional[str]=None, isDataset: bool=True) -> str: +def import_project(path: Path, url: str, owner: Union[str, None]=None, tags: Optional[list[str]]=None, aliases: Optional[list[str]]=None, code: Optional[str]=None, isDataset: bool=True) -> str: """ Import a datalad dataset into the backlogger. @@ -134,14 +135,14 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti uuid = str(conf.get("datalad.dataset.id")) if not uuid: raise ValueError("The dataset does not have a uuid!") - if not os.path.exists(path + "/projects/" + uuid): + if not os.path.exists(path / "projects" / uuid): db_file = get_db_file(path) get(path, db_file) unlock(path, db_file) create_project(path, uuid, owner, tags, aliases, code) - move_submodule(path, 'projects/tmp', 'projects/' + uuid) - os.mkdir(path + '/import_scripts/' + uuid) - save(path, message="Import project from " + url, files=['projects/' + uuid, db_file]) + move_submodule(path, Path('projects/tmp'), Path('projects') / uuid) + os.mkdir(path / 'import_scripts' / uuid) + save(path, message="Import project from " + url, files=[Path(f'projects/{uuid}'), db_file]) else: dl.drop(tmp_path, reckless='kill') shutil.rmtree(tmp_path) @@ -156,7 +157,7 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti return uuid -def drop_project_data(path: str, uuid: str, path_in_project: str = "") -> None: +def drop_project_data(path: Path, uuid: str, path_in_project: str = "") -> None: """ Drop (parts of) a project to free up diskspace @@ -169,6 +170,5 @@ def drop_project_data(path: str, uuid: str, path_in_project: str = "") -> None: path_pn_project: str, optional If set, only the given path within the project is dropped. """ - drop(path + "/projects/" + uuid + "/" + path_in_project) + drop(path / "projects" / uuid / path_in_project) return - diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 8e5855d..be80b6f 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -10,9 +10,13 @@ from .tools import get_db_file, cache_enabled from .tracker import get, save, unlock import shutil from typing import Any +from pathlib import Path -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[str, None]) -> None: +CACHE_DIR = ".cache" + + +def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[str, None]) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -33,7 +37,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, The parameter file used for the measurement. """ db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file files_to_save = [] @@ -44,11 +48,11 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, conn = sqlite3.connect(db) c = conn.cursor() for corr in measurement.keys(): - file_in_archive = os.path.join('.', 'archive', ensemble, corr, uuid + '.json.gz') - file = os.path.join(path, file_in_archive) + file_in_archive = Path('.') / 'archive' / ensemble / corr / str(uuid + '.json.gz') + file = path / file_in_archive known_meas = {} - if not os.path.exists(os.path.join(path, '.', 'archive', ensemble, corr)): - os.makedirs(os.path.join(path, '.', 'archive', ensemble, corr)) + if not os.path.exists(path / 'archive' / ensemble / corr): + os.makedirs(path / 'archive' / ensemble / corr) files_to_save.append(file_in_archive) else: if os.path.exists(file): @@ -99,7 +103,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, pars[subkey] = json.dumps(parameters) for subkey in subkeys: parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() - meas_path = file_in_archive + "::" + parHash + meas_path = str(file_in_archive) + "::" + parHash known_meas[parHash] = measurement[corr][subkey] @@ -115,7 +119,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, return -def load_record(path: str, meas_path: str) -> Union[Corr, Obs]: +def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]: """ Load a list of records by their paths. @@ -134,7 +138,7 @@ def load_record(path: str, meas_path: str) -> Union[Corr, Obs]: return load_records(path, [meas_path])[0] -def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: +def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -162,11 +166,11 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = { returned_data: list[Any] = [] for file in needed_data.keys(): for key in list(needed_data[file]): - if os.path.exists(cache_path(path, file, key) + ".p"): - returned_data.append(load_object(cache_path(path, file, key) + ".p")) + if os.path.exists(str(cache_path(path, file, key)) + ".p"): + returned_data.append(load_object(str(cache_path(path, file, key)) + ".p")) else: if file not in preloaded: - preloaded[file] = preload(path, file) + preloaded[file] = preload(path, Path(file)) returned_data.append(preloaded[file][key]) if cache_enabled(path): if not os.path.exists(cache_dir(path, file)): @@ -175,7 +179,7 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = { return returned_data -def cache_dir(path: str, file: str) -> str: +def cache_dir(path: Path, file: str) -> Path: """ Returns the directory corresponding to the cache for the given file. @@ -190,14 +194,14 @@ def cache_dir(path: str, file: str) -> str: cache_path: str The path holding the cached data for the given file. """ - cache_path_list = [path] - cache_path_list.append(".cache") - cache_path_list.extend(file.split("/")[1:]) - cache_path = "/".join(cache_path_list) + cache_path_list = file.split("/")[1:] + cache_path = path / CACHE_DIR + for directory in cache_path_list: + cache_path /= directory return cache_path -def cache_path(path: str, file: str, key: str) -> str: +def cache_path(path: Path, file: str, key: str) -> Path: """ Parameters ---------- @@ -213,11 +217,11 @@ def cache_path(path: str, file: str, key: str) -> str: cache_path: str The path at which the measurement of the given file and key is cached. """ - cache_path = os.path.join(cache_dir(path, file), key) + cache_path = cache_dir(path, file) / key return cache_path -def preload(path: str, file: str) -> dict[str, Any]: +def preload(path: Path, file: Path) -> dict[str, Any]: """ Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method. @@ -234,12 +238,12 @@ def preload(path: str, file: str) -> dict[str, Any]: The data read from the file. """ get(path, file) - filedict: dict[str, Any] = pj.load_json_dict(os.path.join(path, file)) + filedict: dict[str, Any] = pj.load_json_dict(path / file) print("> read file") return filedict -def drop_record(path: str, meas_path: str) -> None: +def drop_record(path: Path, meas_path: str) -> None: """ Drop a record by it's path. @@ -251,9 +255,9 @@ def drop_record(path: str, meas_path: str) -> None: The measurement path as noted in the database. """ file_in_archive = meas_path.split("::")[0] - file = os.path.join(path, file_in_archive) + file = path / file_in_archive db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file get(path, db_file) sub_key = meas_path.split("::")[1] unlock(path, db_file) @@ -268,7 +272,7 @@ def drop_record(path: str, meas_path: str) -> None: known_meas = pj.load_json_dict(file) if sub_key in known_meas: del known_meas[sub_key] - unlock(path, file_in_archive) + unlock(path, Path(file_in_archive)) pj.dump_dict_to_json(known_meas, file) save(path, message="Drop measurements to database", files=[db, file]) return @@ -276,7 +280,7 @@ def drop_record(path: str, meas_path: str) -> None: raise ValueError("This measurement does not exist as a file!") -def drop_cache(path: str) -> None: +def drop_cache(path: Path) -> None: """ Drop the cache directory of the library. @@ -285,7 +289,7 @@ def drop_cache(path: str) -> None: path: str The path of the library. """ - cache_dir = os.path.join(path, ".cache") + cache_dir = path / ".cache" for f in os.listdir(cache_dir): - shutil.rmtree(os.path.join(cache_dir, f)) + shutil.rmtree(cache_dir / f) return diff --git a/corrlib/toml.py b/corrlib/toml.py index feafaf6..add3739 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -19,6 +19,7 @@ from .meas_io import write_measurement import os from .input.implementations import codes as known_codes from typing import Any +from pathlib import Path def replace_string(string: str, name: str, val: str) -> str: @@ -126,7 +127,7 @@ def check_measurement_data(measurements: dict[str, dict[str, str]], code: str) - return -def import_tomls(path: str, files: list[str], copy_files: bool=True) -> None: +def import_tomls(path: Path, files: list[str], copy_files: bool=True) -> None: """ Import multiple toml files. @@ -144,7 +145,7 @@ def import_tomls(path: str, files: list[str], copy_files: bool=True) -> None: return -def import_toml(path: str, file: str, copy_file: bool=True) -> None: +def import_toml(path: Path, file: str, copy_file: bool=True) -> None: """ Import a project decribed by a .toml file. @@ -171,7 +172,7 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: aliases = project.get('aliases', []) uuid = project.get('uuid', None) if uuid is not None: - if not os.path.exists(path + "/projects/" + uuid): + if not os.path.exists(path / "projects" / uuid): uuid = import_project(path, project['url'], aliases=aliases) else: update_aliases(path, uuid, aliases) @@ -213,18 +214,18 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) print(mname + " imported.") - if not os.path.exists(os.path.join(path, "toml_imports", uuid)): - os.makedirs(os.path.join(path, "toml_imports", uuid)) + if not os.path.exists(path / "toml_imports" / uuid): + os.makedirs(path / "toml_imports" / uuid) if copy_file: - import_file = os.path.join(path, "toml_imports", uuid, file.split("/")[-1]) + import_file = path / "toml_imports" / uuid / file.split("/")[-1] shutil.copy(file, import_file) - save(path, files=[import_file], message="Import using " + import_file) - print("File copied to " + import_file) + save(path, files=[import_file], message=f"Import using {import_file}") + print(f"File copied to {import_file}") print("Imported project.") return -def reimport_project(path: str, uuid: str) -> None: +def reimport_project(path: Path, uuid: str) -> None: """ Reimport an existing project using the files that are already available for this project. @@ -235,14 +236,14 @@ def reimport_project(path: str, uuid: str) -> None: uuid: str uuid of the project that is to be reimported. """ - config_path = "/".join([path, "import_scripts", uuid]) + config_path = path / "import_scripts" / uuid for p, filenames, dirnames in os.walk(config_path): for fname in filenames: import_toml(path, os.path.join(config_path, fname), copy_file=False) return -def update_project(path: str, uuid: str) -> None: +def update_project(path: Path, uuid: str) -> None: """ Update all entries associated with a given project. diff --git a/corrlib/tools.py b/corrlib/tools.py index 727ed30..93f0678 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -101,7 +101,7 @@ def set_config(path: Path, section: str, option: str, value: Any) -> None: return -def get_db_file(path: Path) -> str: +def get_db_file(path: Path) -> Path: """ Get the database file associated with the library at the given path. @@ -121,7 +121,7 @@ def get_db_file(path: Path) -> str: config.read(config_path) else: raise FileNotFoundError("Configuration file not found.") - db_file = config.get('paths', 'db', fallback='backlogger.db') + db_file = Path(config.get('paths', 'db', fallback='backlogger.db')) return db_file diff --git a/corrlib/tracker.py b/corrlib/tracker.py index e535b03..a6e9bf4 100644 --- a/corrlib/tracker.py +++ b/corrlib/tracker.py @@ -4,9 +4,10 @@ import datalad.api as dl from typing import Optional import shutil from .tools import get_db_file +from pathlib import Path -def get_tracker(path: str) -> str: +def get_tracker(path: Path) -> str: """ Get the tracker used in the dataset located at path. @@ -30,7 +31,7 @@ def get_tracker(path: str) -> str: return tracker -def get(path: str, file: str) -> None: +def get(path: Path, file: Path) -> None: """ Wrapper function to get a file from the dataset located at path with the specified tracker. @@ -56,7 +57,7 @@ def get(path: str, file: str) -> None: return -def save(path: str, message: str, files: Optional[list[str]]=None) -> None: +def save(path: Path, message: str, files: Optional[list[Path]]=None) -> None: """ Wrapper function to save a file to the dataset located at path with the specified tracker. @@ -72,7 +73,7 @@ def save(path: str, message: str, files: Optional[list[str]]=None) -> None: tracker = get_tracker(path) if tracker == 'datalad': if files is not None: - files = [os.path.join(path, f) for f in files] + files = [path / f for f in files] dl.save(files, message=message, dataset=path) elif tracker == 'None': Warning("Tracker 'None' does not implement save.") @@ -81,7 +82,7 @@ def save(path: str, message: str, files: Optional[list[str]]=None) -> None: raise ValueError(f"Tracker {tracker} is not supported.") -def init(path: str, tracker: str='datalad') -> None: +def init(path: Path, tracker: str='datalad') -> None: """ Initialize a dataset at the specified path with the specified tracker. @@ -101,7 +102,7 @@ def init(path: str, tracker: str='datalad') -> None: return -def unlock(path: str, file: str) -> None: +def unlock(path: Path, file: Path) -> None: """ Wrapper function to unlock a file in the dataset located at path with the specified tracker. @@ -123,7 +124,7 @@ def unlock(path: str, file: str) -> None: return -def clone(path: str, source: str, target: str) -> None: +def clone(path: Path, source: str, target: str) -> None: """ Wrapper function to clone a dataset from source to target with the specified tracker. Parameters @@ -147,7 +148,7 @@ def clone(path: str, source: str, target: str) -> None: return -def drop(path: str, reckless: Optional[str]=None) -> None: +def drop(path: Path, reckless: Optional[str]=None) -> None: """ Wrapper function to drop data from a dataset located at path with the specified tracker. diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 9284c82..d78fb15 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -5,21 +5,21 @@ from pathlib import Path def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path)) + init.create(dataset_path) assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) def test_init_folders_no_tracker(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path), tracker="None") + init.create(dataset_path, tracker="None") assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) def test_init_config(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path), tracker="None") + init.create(dataset_path, tracker="None") config_path = dataset_path / ".corrlib" assert os.path.exists(str(config_path)) from configparser import ConfigParser @@ -37,7 +37,7 @@ def test_init_config(tmp_path: Path) -> None: def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path)) + init.create(dataset_path) assert os.path.exists(str(dataset_path / "backlogger.db")) conn = sql.connect(str(dataset_path / "backlogger.db")) cursor = conn.cursor() diff --git a/tests/tools_test.py b/tests/tools_test.py index 9be88b4..a5427f3 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -39,7 +39,7 @@ def test_set_config(tmp_path: Path) -> None: value = "test_value" # config is not yet available tl.set_config(tmp_path, section, option, value) - config_path = os.path.join(tmp_path, '.corrlib') + config_path = tmp_path / '.corrlib' config = ConfigParser() config.read(config_path) assert config.get('core', 'test_option', fallback="not the value") == "test_value" @@ -48,7 +48,7 @@ def test_set_config(tmp_path: Path) -> None: option = "test_option2" value = "test_value2" tl.set_config(tmp_path, section, option, value) - config_path = os.path.join(tmp_path, '.corrlib') + config_path = tmp_path / '.corrlib' config = ConfigParser() config.read(config_path) assert config.get('core', 'test_option2', fallback="not the value") == "test_value2" @@ -57,7 +57,7 @@ def test_set_config(tmp_path: Path) -> None: option = "test_option2" value = "test_value3" tl.set_config(tmp_path, section, option, value) - config_path = os.path.join(tmp_path, '.corrlib') + config_path = tmp_path / '.corrlib' config = ConfigParser() config.read(config_path) assert config.get('core', 'test_option2', fallback="not the value") == "test_value3" @@ -69,7 +69,7 @@ def test_get_db_file(tmp_path: Path) -> None: value = "test_value" # config is not yet available tl.set_config(tmp_path, section, option, value) - assert tl.get_db_file(tmp_path) == "test_value" + assert tl.get_db_file(tmp_path) == Path("test_value") def test_cache_enabled(tmp_path: Path) -> None: From 480c04e0692c045d5129b8822530d11443fe5b9a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 16:18:32 +0100 Subject: [PATCH 056/111] lint --- tests/sfcf_in_test.py | 2 +- tests/tools_test.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/sfcf_in_test.py b/tests/sfcf_in_test.py index 5e4ff83..7ebc94a 100644 --- a/tests/sfcf_in_test.py +++ b/tests/sfcf_in_test.py @@ -26,4 +26,4 @@ def test_get_specs() -> None: key = "f_P/q1 q2/1/0/0" specs = json.loads(input.get_specs(key, parameters)) assert specs['quarks'] == ['a', 'b'] - assert specs['wf1'][0] == [1, [0, 0]] \ No newline at end of file + assert specs['wf1'][0] == [1, [0, 0]] diff --git a/tests/tools_test.py b/tests/tools_test.py index a5427f3..541674f 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -1,6 +1,5 @@ from corrlib import tools as tl from configparser import ConfigParser -import os from pathlib import Path import pytest @@ -81,5 +80,5 @@ def test_cache_enabled(tmp_path: Path) -> None: tl.set_config(tmp_path, section, option, "False") assert not tl.cache_enabled(tmp_path) tl.set_config(tmp_path, section, option, "lalala") - with pytest.raises(ValueError) as e_info: + with pytest.raises(ValueError): tl.cache_enabled(tmp_path) From 2396a657b29482060917ecb2a99a13ee70f091ae Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 17:50:38 +0100 Subject: [PATCH 057/111] rename init_tests --- tests/{test_initialization.py => initialization_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_initialization.py => initialization_test.py} (100%) diff --git a/tests/test_initialization.py b/tests/initialization_test.py similarity index 100% rename from tests/test_initialization.py rename to tests/initialization_test.py From a57138dc50f1c1caca90794a14d53b11efa5165b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 18:26:17 +0100 Subject: [PATCH 058/111] add test for project alias lookup --- tests/find_test.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/find_test.py diff --git a/tests/find_test.py b/tests/find_test.py new file mode 100644 index 0000000..b63b246 --- /dev/null +++ b/tests/find_test.py @@ -0,0 +1,33 @@ +import corrlib.find as find +import sqlite3 +from pathlib import Path +import corrlib.initialization as cinit +import pytest + + +def make_sql(path: Path) -> Path: + db = path / "test.db" + cinit._create_db(db) + return db + +def test_find_lookup_by_one_alias(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + assert uuid == find._project_lookup_by_alias(db, "fun_project") + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(db, "fun_project") + conn.close() From 8a8480af32a16c9bcdafea031f4951eeee3f8250 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 18:26:40 +0100 Subject: [PATCH 059/111] fix alias db --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index faef5db..4c51e05 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -28,7 +28,7 @@ def _project_lookup_by_alias(db: Path, alias: str) -> str: """ conn = sqlite3.connect(db) c = conn.cursor() - c.execute(f"SELECT * FROM 'projects' WHERE alias = '{alias}'") + c.execute(f"SELECT * FROM 'projects' WHERE aliases = '{alias}'") results = c.fetchall() conn.close() if len(results)>1: From bd34b7c3785ddb509f2fbfffbf01ce145ee0463d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 22:33:01 +0100 Subject: [PATCH 060/111] write first trivial find test --- tests/find_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/find_test.py b/tests/find_test.py index b63b246..8cc7923 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -10,6 +10,7 @@ def make_sql(path: Path) -> Path: cinit._create_db(db) return db + def test_find_lookup_by_one_alias(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) From f8566207e36978af1cefded2ae6c8fc521e732cd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 22:43:39 +0100 Subject: [PATCH 061/111] add id lookup test --- tests/find_test.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/find_test.py b/tests/find_test.py index 8cc7923..e0730e9 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -32,3 +32,25 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() + + +def test_find_lookup_by_id(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + conn.close() + result = find._project_lookup_by_id(db, uuid)[0] + assert uuid == result[0] + assert alias_str == result[1] + assert tag_str == result[2] + assert owner == result[3] + assert code == result[4] + From d0d5f9aa8775c4ce78f547d7cbe3dea5199743cb Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:37:22 +0100 Subject: [PATCH 062/111] rewrite time filter --- corrlib/find.py | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 4c51e05..e4ee735 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -8,6 +8,7 @@ from .tools import k2m, get_db_file from .tracker import get from typing import Any, Optional from pathlib import Path +import datetime as dt def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -62,8 +63,37 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: return results -def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, - created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: +def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: + drops = [] + for ind in len(results): + result = results.iloc[ind] + created_at = dt.datetime.fromisoformat(result['created_at']) + updated_at = dt.datetime.fromisoformat(result['updated_at']) + + if created_before is not None: + created_before = dt.datetime.fromisoformat(created_before) + if created_before < created_at: + drops.append(ind) + continue + if created_after is not None: + created_after = dt.datetime.fromisoformat(created_after) + if created_before > created_at: + drops.append(ind) + continue + if updated_before is not None: + updated_before = dt.datetime.fromisoformat(updated_before) + if updated_before < updated_at: + drops.append(ind) + continue + if updated_after is not None: + updated_after = dt.datetime.fromisoformat(updated_after) + if updated_after > updated_at: + drops.append(ind) + continue + return results.drop(drops) + + +def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None) -> pd.DataFrame: """ Look up a correlator record in the database by the data given to the method. @@ -105,14 +135,6 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project search_expr += f" AND code = '{code}'" if parameters: search_expr += f" AND parameters = '{parameters}'" - if created_before: - search_expr += f" AND created_at < '{created_before}'" - if created_after: - search_expr += f" AND created_at > '{created_after}'" - if updated_before: - search_expr += f" AND updated_at < '{updated_before}'" - if updated_after: - search_expr += f" AND updated_at > '{updated_after}'" conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) conn.close() @@ -236,7 +258,9 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) - results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after) + results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) + if Any([created_before, created_after, updated_before, updated_after]): + results = _time_filter(results, created_before, created_after, updated_before, updated_after) if code == "sfcf": results = sfcf_filter(results, **kwargs) elif code == "openQCD": From 29558a734b1522c94979858fb1ed0a12f8ed20d3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:38:40 +0100 Subject: [PATCH 063/111] add test for db lookup --- tests/find_test.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/find_test.py b/tests/find_test.py index e0730e9..da1bfc1 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -54,3 +54,42 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: assert owner == result[3] assert code == result[4] + +def test_db_lookup(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + + corr = "f_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 5.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + conn.close() + + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert results.empty + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert results.empty + #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) + + #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) + + From 402ca07edbecda8bb5828596e98527c9ed2de8a4 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:42:42 +0100 Subject: [PATCH 064/111] linting and hotfix --- corrlib/find.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index e4ee735..3e62344 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -65,32 +65,32 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: drops = [] - for ind in len(results): + for ind in range(len(results)): result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) if created_before is not None: - created_before = dt.datetime.fromisoformat(created_before) - if created_before < created_at: + date_created_before = dt.datetime.fromisoformat(created_before) + if date_created_before < created_at: drops.append(ind) continue if created_after is not None: - created_after = dt.datetime.fromisoformat(created_after) - if created_before > created_at: + date_created_after = dt.datetime.fromisoformat(created_after) + if date_created_after > created_at: drops.append(ind) continue if updated_before is not None: - updated_before = dt.datetime.fromisoformat(updated_before) - if updated_before < updated_at: + date_updated_before = dt.datetime.fromisoformat(updated_before) + if date_updated_before < updated_at: drops.append(ind) continue if updated_after is not None: - updated_after = dt.datetime.fromisoformat(updated_after) - if updated_after > updated_at: + date_updated_after = dt.datetime.fromisoformat(updated_after) + if date_updated_after > updated_at: drops.append(ind) continue - return results.drop(drops) + return results.drop(drops) def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None) -> pd.DataFrame: From b50ffc4c6b898de970e8ded6c2287c96a6c6389b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:45:22 +0100 Subject: [PATCH 065/111] any hotfix --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index 3e62344..14b1772 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -259,7 +259,7 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) - if Any([created_before, created_after, updated_before, updated_after]): + if any(arg is not None for arg in [created_before, created_after, updated_before, updated_after]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) if code == "sfcf": results = sfcf_filter(results, **kwargs) From c431145a23764015d52ed6a1fd3da007d554cc3f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 09:23:30 +0100 Subject: [PATCH 066/111] some more db lookup --- tests/find_test.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index da1bfc1..e895b85 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -70,7 +70,6 @@ def test_db_lookup(tmp_path: Path) -> None: c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) conn.commit() - conn.close() results = find._db_lookup(db, ensemble, corr, code) assert len(results) == 1 @@ -88,8 +87,38 @@ def test_db_lookup(tmp_path: Path) -> None: assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") assert results.empty - #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) - #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) + corr = "g_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 4.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + corr = "f_A" + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert len(results) == 1 + conn.close() From 3fd557f3eebd2a57b9340b727a23f72586f6e68e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 09:24:12 +0100 Subject: [PATCH 067/111] add customtFilter --- corrlib/find.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 14b1772..8934854 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,9 +6,10 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get -from typing import Any, Optional +from typing import Any, Optional, Union from pathlib import Path import datetime as dt +from collections.abc import Callable def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -251,22 +252,31 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: return results.drop(drops) +def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: + return results + + def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, - created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: + created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, + revision: Optional[str]=None, + customFilter: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, + **kwargs: Any) -> pd.DataFrame: db_file = get_db_file(path) db = path / db_file if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) - if any(arg is not None for arg in [created_before, created_after, updated_before, updated_after]): + if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) + if customFilter is not None: + results = customFilter(results) if code == "sfcf": results = sfcf_filter(results, **kwargs) elif code == "openQCD": - pass + results = openQCD_filter(results, **kwargs) else: - raise Exception + raise ValueError(f"Code {code} is not known.") print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() From 3fe8e28a68a58a4cf8bce7a29d43f60286000c81 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 09:25:21 +0100 Subject: [PATCH 068/111] customtFilter after general filters --- corrlib/find.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 8934854..e099aea 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -269,14 +269,14 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) - if customFilter is not None: - results = customFilter(results) if code == "sfcf": results = sfcf_filter(results, **kwargs) elif code == "openQCD": results = openQCD_filter(results, **kwargs) else: raise ValueError(f"Code {code} is not known.") + if customFilter is not None: + results = customFilter(results) print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() From 4516ca3149cac8b2f0420903c41576b471b7ed8f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:39:00 +0100 Subject: [PATCH 069/111] better type annotation fir id lookup --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index e099aea..dd3a9a6 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -40,7 +40,7 @@ def _project_lookup_by_alias(db: Path, alias: str) -> str: return str(results[0][0]) -def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: +def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, ...]]: """ Return the project information available in the database by UUID. From 2fd46d452b84cf326d19d0a814a465631b5c8241 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:40:46 +0100 Subject: [PATCH 070/111] hotfix ensure that path is a Path --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index be80b6f..de19727 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path: The path holding the cached data for the given file. """ cache_path_list = file.split("/")[1:] - cache_path = path / CACHE_DIR + cache_path = Path(path) / CACHE_DIR for directory in cache_path_list: cache_path /= directory return cache_path From b8121811f967530f174202d5c67e72a0132295de Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:49:04 +0100 Subject: [PATCH 071/111] HOTFIX: hand over path as str --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index de19727..a87e227 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]: The data read from the file. """ get(path, file) - filedict: dict[str, Any] = pj.load_json_dict(path / file) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) print("> read file") return filedict From 38b4983fed0af22231b27b57dc32a4efb121a63a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:50:30 +0100 Subject: [PATCH 072/111] HOTFIX: hand over path as str 2 --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a87e227..48017a1 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = if cache_enabled(path): if not os.path.exists(cache_dir(path, file)): os.makedirs(cache_dir(path, file)) - dump_object(preloaded[file][key], cache_path(path, file, key)) + dump_object(preloaded[file][key], str(cache_path(path, file, key))) return returned_data From cc14e68b4429a122ee0c9b299555f4e7ca8fef45 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 26 Mar 2026 17:19:58 +0100 Subject: [PATCH 073/111] add tests for time filter and find project, add a first check for integrity of the database --- corrlib/find.py | 4 ++ corrlib/integrity.py | 5 ++ tests/find_test.py | 116 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 corrlib/integrity.py diff --git a/corrlib/find.py b/corrlib/find.py index dd3a9a6..3cbe09b 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,6 +6,7 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get +from .integrity import check_time_validity from typing import Any, Optional, Union from pathlib import Path import datetime as dt @@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) + db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at) + if not db_times_valid: + raise ValueError('Time stamps not valid for result with path', result["path"]) if created_before is not None: date_created_before = dt.datetime.fromisoformat(created_before) diff --git a/corrlib/integrity.py b/corrlib/integrity.py new file mode 100644 index 0000000..bf890db --- /dev/null +++ b/corrlib/integrity.py @@ -0,0 +1,5 @@ +import datetime as dt + + +def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: + return not (created_at > updated_at) diff --git a/tests/find_test.py b/tests/find_test.py index e895b85..573f87e 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -3,6 +3,8 @@ import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest +import pandas as pd +import datalad.api as dl def make_sql(path: Path) -> Path: @@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: conn.close() +def test_find_project(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + assert uuid == find.find_project(tmp_path, "fun_project") + + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") + conn.close() + + def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None: assert len(results) == 1 conn.close() + + +def test_time_filter() -> None: + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... + + data = [record_A, record_B, record_C, record_D, record_E] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') + assert len(results) == 3 + results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') + assert len(results) == 3 + results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') + assert len(results) == 1 + + results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') + assert len(results) == 2 + results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') + assert len(results) == 2 + results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') + assert len(results) == 4 + + results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') + assert len(results) == 3 + results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') + assert len(results) == 3 + results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') + assert len(results) == 1 + + results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') + assert len(results) == 2 + results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') + assert len(results) == 2 + results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') + assert len(results) == 4 + + data = [record_A, record_B, record_C, record_D, record_F] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.raises(ValueError): + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') From 81af9579dcad49d0b5c3095b0d467cf49d2282e6 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 26 Mar 2026 17:25:57 +0100 Subject: [PATCH 074/111] add a docstring for time filter --- corrlib/find.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/corrlib/find.py b/corrlib/find.py index 3cbe09b..cb85130 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -66,6 +66,22 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, ...]]: def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: + """ + Filter the results from the database in terms of the creation and update times. + + Parameters + ---------- + results: pd.DataFrame + The dataframe holding the unfilteres results from the database. + created_before: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The creation date has to be truly before the date and time given. + created_after: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The creation date has to be truly after the date and time given. + updated_before: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The date of the last update has to be truly before the date and time given. + updated_after: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The date of the last update has to be truly after the date and time given. + """ drops = [] for ind in range(len(results)): result = results.iloc[ind] From e8360c88b938cbd3636b8b8cfa30ce0b3375e7ed Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Mar 2026 11:53:07 +0100 Subject: [PATCH 075/111] add more templates --- tests/find_test.py | 278 +++++++++++++++++++++++++++++---------------- 1 file changed, 180 insertions(+), 98 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index 573f87e..944ae5f 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -35,35 +35,6 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() - -def test_find_project(tmp_path: Path) -> None: - cinit.create(tmp_path) - db = tmp_path / "backlogger.db" - dl.unlock(str(db), dataset=str(tmp_path)) - conn = sqlite3.connect(db) - c = conn.cursor() - uuid = "test_uuid" - alias_str = "fun_project" - tag_str = "tt" - owner = "tester" - code = "test_code" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - assert uuid == find.find_project(tmp_path, "fun_project") - - uuid = "test_uuid2" - alias_str = "fun_project" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - with pytest.raises(Exception): - assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") - conn.close() - - def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -85,75 +56,6 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: assert code == result[4] -def test_db_lookup(tmp_path: Path) -> None: - db = make_sql(tmp_path) - conn = sqlite3.connect(db) - c = conn.cursor() - - corr = "f_A" - ensemble = "SF_A" - code = "openQCD" - meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" - uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 5.0}" - parameter_file = "projects/Project_A/myinput.in" - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) - conn.commit() - - results = find._db_lookup(db, ensemble, corr, code) - assert len(results) == 1 - results = find._db_lookup(db, "SF_B", corr, code) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code) - assert results.empty - results = find._db_lookup(db, ensemble, corr, "sfcf") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, parameters = pars) - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") - assert results.empty - - corr = "g_A" - ensemble = "SF_A" - code = "openQCD" - meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" - uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 4.0}" - parameter_file = "projects/Project_A/myinput.in" - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) - conn.commit() - - corr = "f_A" - results = find._db_lookup(db, ensemble, corr, code) - assert len(results) == 1 - results = find._db_lookup(db, "SF_B", corr, code) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code) - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, "sfcf") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, parameters = pars) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") - assert len(results) == 1 - - conn.close() - - def test_time_filter() -> None: record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created @@ -238,3 +140,183 @@ def test_time_filter() -> None: with pytest.raises(ValueError): results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + + +def test_db_lookup(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + + corr = "f_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 5.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert results.empty + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert results.empty + + corr = "g_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 4.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + + corr = "f_A" + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert len(results) == 1 + + conn.close() + + +def test_sfcf_filter() -> None: + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_4 = [] + record_5 = [] + record_6 = [] + record_7 = [] + record_8 = [] + record_9 = [] + data = [ + record_0, + record_1, + record_2, + record_3, + record_4, + record_5, + record_6, + record_7, + record_8, + record_9, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + assert True + + +def test_openQCD_filter() -> None: + assert True + + +def test_find_record() -> None: + assert True + + +def test_find_project(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + assert uuid == find.find_project(tmp_path, "fun_project") + + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") + conn.close() + + +def test_list_projects(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + uuid = "test_uuid2" + alias_str = "fun_project2" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + uuid = "test_uuid3" + alias_str = "fun_project3" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + uuid = "test_uuid4" + alias_str = "fun_project4" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + conn.close() + results = find.list_projects(tmp_path) + assert len(results) == 4 + for i in range(4): + assert len(results[i]) == 2 From 1a1ac5121dbd623513bfaca70de0aa829352029c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Mar 2026 11:53:39 +0100 Subject: [PATCH 076/111] restructure: make code filter --- corrlib/find.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index cb85130..9b2c201 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -276,6 +276,15 @@ def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: return results +def _code_filter(results: pd.DataFrame, code: str, **kwargs: Any) -> pd.DataFrame: + if code == "sfcf": + return sfcf_filter(results, **kwargs) + elif code == "openQCD": + return openQCD_filter(results, **kwargs) + else: + raise ValueError(f"Code {code} is not known.") + + def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, @@ -289,12 +298,7 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) - if code == "sfcf": - results = sfcf_filter(results, **kwargs) - elif code == "openQCD": - results = openQCD_filter(results, **kwargs) - else: - raise ValueError(f"Code {code} is not known.") + results = _code_filter(results, code, **kwargs) if customFilter is not None: results = customFilter(results) print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) From f98521b5a1f6839d1624579186677ee8ce5cbf5d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Mar 2026 11:56:40 +0100 Subject: [PATCH 077/111] HOTFIX: strings for pyerrors 3 --- corrlib/meas_io.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 48017a1..0f9ac02 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -59,7 +59,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str if file not in files_to_save: unlock(path, file_in_archive) files_to_save.append(file_in_archive) - known_meas = pj.load_json_dict(file, verbose=False) + known_meas = pj.load_json_dict(str(file), verbose=False) if code == "sfcf": if parameter_file is not None: parameters = sfcf.read_param(path, uuid, parameter_file) @@ -113,7 +113,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) conn.commit() - pj.dump_dict_to_json(known_meas, file) + pj.dump_dict_to_json(known_meas, str(file)) conn.close() save(path, message="Add measurements to database", files=files_to_save) return @@ -269,11 +269,11 @@ def drop_record(path: Path, meas_path: str) -> None: raise ValueError("This measurement does not exist as an entry!") conn.commit() - known_meas = pj.load_json_dict(file) + known_meas = pj.load_json_dict(str(file)) if sub_key in known_meas: del known_meas[sub_key] unlock(path, Path(file_in_archive)) - pj.dump_dict_to_json(known_meas, file) + pj.dump_dict_to_json(known_meas, str(file)) save(path, message="Drop measurements to database", files=[db, file]) return else: From 4673751dc3fd56dcb9776fb8d79e2d5b60f9e4b2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 7 Apr 2026 11:29:10 +0200 Subject: [PATCH 078/111] add docstrings for openQCD filter --- corrlib/find.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/corrlib/find.py b/corrlib/find.py index 9b2c201..d368973 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -273,10 +273,43 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: + """ + Filter for parameters of openQCD. + + Parameters + ---------- + results: pd.DataFrame + The unfiltered list of results from the database. + + Returns + ------- + results: pd.DataFrame + The filtered results. + + """ return results def _code_filter(results: pd.DataFrame, code: str, **kwargs: Any) -> pd.DataFrame: + """ + Abstraction of the filters for the different codes that are available. + At the moment, only openQCD and SFCF are known. + The possible key words for the parameters can be seen in the descriptionso f the code-specific filters. + + Parameters + ---------- + results: pd.DataFrame + The unfiltered list of results from the database. + code: str + The name of the code that produced the record at hand. + kwargs: + The keyworkd args that are handed over to the code-specific filters. + + Returns + ------- + results: pd.DataFrame + The filtered results. + """ if code == "sfcf": return sfcf_filter(results, **kwargs) elif code == "openQCD": From 8db8d46a06c76bed244bcd9df374c6060d1886ff Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 7 Apr 2026 11:40:48 +0200 Subject: [PATCH 079/111] add very simple tests or code filter and openQCD filter, fix json par strings --- tests/find_test.py | 127 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 102 insertions(+), 25 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index 944ae5f..156e5fe 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -57,17 +57,17 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: def test_time_filter() -> None: - record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created - record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated - record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later - record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] - record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier - record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... data = [record_A, record_B, record_C, record_D, record_E] @@ -172,7 +172,7 @@ def test_db_lookup(tmp_path: Path) -> None: assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") + results = find._db_lookup(db, ensemble, corr, code, parameters = '{"par_A": 3.0, "par_B": 4.0}') assert results.empty corr = "g_A" @@ -180,7 +180,7 @@ def test_db_lookup(tmp_path: Path) -> None: code = "openQCD" meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 4.0}" + pars = '{"par_A": 3.0, "par_B": 4.0}' parameter_file = "projects/Project_A/myinput.in" c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) @@ -205,38 +205,26 @@ def test_db_lookup(tmp_path: Path) -> None: assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") + results = find._db_lookup(db, ensemble, "g_A", code, parameters = '{"par_A": 3.0, "par_B": 4.0}') assert len(results) == 1 conn.close() def test_sfcf_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_4 = [] - record_5 = [] - record_6 = [] - record_7 = [] - record_8 = [] - record_9 = [] data = [ record_0, record_1, record_2, record_3, - record_4, - record_5, - record_6, - record_7, - record_8, - record_9, ] cols = ["name", "ensemble", @@ -253,9 +241,98 @@ def test_sfcf_filter() -> None: def test_openQCD_filter() -> None: + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + data = [ + record_0, + record_1, + record_2, + record_3, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + find.openQCD_filter(df) assert True +def test_code_filter() -> None: + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_4 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_5 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_6 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_7 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_8 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + data = [ + record_0, + record_1, + record_2, + record_3, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + res = find._code_filter(df, "sfcf") + assert len(res) == 4 + + data = [ + record_4, + record_5, + record_6, + record_7, + record_8, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + res = find._code_filter(df, "openQCD") + assert len(res) == 5 + with pytest.raises(ValueError): + res = find._code_filter(df, "asdf") + + def test_find_record() -> None: assert True From 3a1e41808b00763ec270ef32700a9ba45fcf74ee Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 8 Apr 2026 17:26:38 +0200 Subject: [PATCH 080/111] correct minor typos in doc --- corrlib/find.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index d368973..660e4bf 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -179,9 +179,9 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: qk2: float, optional Mass parameter $\kappa_2$ of the first quark. qm1: float, optional - Bare quak mass $m_1$ of the first quark. + Bare quark mass $m_1$ of the first quark. qm2: float, optional - Bare quak mass $m_1$ of the first quark. + Bare quark mass $m_2$ of the first quark. quarks_thetas: list[list[float]], optional wf1: optional wf2: optional From 7275fdd4f33c67d0927ca65990e59865ce77cac2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 8 Apr 2026 17:28:54 +0200 Subject: [PATCH 081/111] remove unnecessary output when results are empty --- corrlib/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/corrlib/cli.py b/corrlib/cli.py index b28692a..6c1c3c5 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -108,6 +108,8 @@ def find( Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. """ results = find_record(path, ensemble, corr, code) + if results.empty: + return if arg == 'all': print(results) else: From 5c37c06b1356ab7c53dfdc1524470d5cd4df330b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 09:54:39 +0200 Subject: [PATCH 082/111] add an implementation to read the first ~200 bytes of the par file of openQCD's qcd2 --- corrlib/input/openQCD.py | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index a3bce6f..9c5fbbc 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -4,6 +4,7 @@ import os import fnmatch from typing import Any, Optional from pathlib import Path +import struct def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: @@ -304,3 +305,58 @@ def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, A t1_dict[param["type"]] = {} t1_dict[param["type"]][pars] = t0 return t1_dict + + +def read_par_file(fname: str) -> dict[str, dict[str, Any]]: + + def _qcd2_write_lat_parms() -> dict[str, Any]: + lat_pars = {} + + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] + kappas = [] + m0s = [] + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + def _qcd2_write_bc_parms() -> dict[str, Any]: + bc_pars = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) + bc_pars["cF"] = list(bc_parms[2:4]) + phi = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars + + with open(fname, "rb") as fp: + lat_par_dict = _qcd2_write_lat_parms() + bc_par_dict = _qcd2_write_bc_parms() + fp.close() + par_dict = {} + par_dict["lat"] = lat_par_dict + par_dict["bc"] = bc_par_dict + return par_dict From 5ea832675702ae6715b07215de120315657ca03d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 10:26:47 +0200 Subject: [PATCH 083/111] add thin wrapper to accomodate for input conventions, add comments --- corrlib/input/openQCD.py | 62 +++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 9c5fbbc..1d36e7f 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -307,27 +307,42 @@ def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, A return t1_dict -def read_par_file(fname: str) -> dict[str, dict[str, Any]]: +def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: + """ + The subroutines written here have names according to the openQCD programs and functions that write out the data. + Parameters + ---------- + fname: Path + Location of the parameter file. + + Returns + ------- + par_dict: dict + Dictionary holding the parameters specified in the given file. + """ def _qcd2_write_lat_parms() -> dict[str, Any]: + """ + Unpack the lattice parameters written by write_lat_parms. + """ lat_pars = {} - t = fp.read(16) lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends t = fp.read(8) - nk, isw = struct.unpack('ii', t) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter lat_pars["nk"] = nk lat_pars["isw"] = isw t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] + lat_pars["beta"] = struct.unpack('d', t)[0] # beta t = fp.read(8) lat_pars["c0"] = struct.unpack('d', t)[0] t = fp.read(8) lat_pars["c1"] = struct.unpack('d', t)[0] t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor kappas = [] m0s = [] + # read kappas for ik in range(nk): t = fp.read(8) kappas.append(struct.unpack('d', t)[0]) @@ -338,14 +353,17 @@ def read_par_file(fname: str) -> dict[str, dict[str, Any]]: return lat_pars def _qcd2_write_bc_parms() -> dict[str, Any]: - bc_pars = {} + """ + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries t = fp.read(104) bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) - bc_pars["cF"] = list(bc_parms[2:4]) - phi = [[], []] + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] phi[0] = list(bc_parms[4:7]) phi[1] = list(bc_parms[7:10]) bc_pars["phi"] = phi @@ -360,3 +378,27 @@ def read_par_file(fname: str) -> dict[str, dict[str, Any]]: par_dict["lat"] = lat_par_dict par_dict["bc"] = bc_par_dict return par_dict + + +def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: + """ + Thin wrapper around read_qcd2_par_file, getting the file before reading. + + Parameters + ---------- + path: Path + Path of the corrlib repository. + project: str + UUID of the project of the parameter-file. + file_in_project: str + The loaction of the file in the project directory. + + Returns + ------- + par_dict: dict + The dict with the parameters read from the .par-file. + """ + fname = path / "projects" / project / file_in_project + ds = os.path.join(path, "projects", project) + dl.get(fname, dataset=ds) + return read_qcd2_par_file(fname) From 71332264cf36ad27c9a9d840c7f14c86ba7835ce Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 10:47:19 +0200 Subject: [PATCH 084/111] restruct: give each openQCD prog it's own file --- corrlib/input/openQCD.py | 103 +++++++++---------------------- corrlib/pars/openQCD/__init__.py | 3 + corrlib/pars/openQCD/ms1.py | 81 ++++++++++++++++++++++++ corrlib/pars/openQCD/qcd2.py | 77 +++++++++++++++++++++++ 4 files changed, 189 insertions(+), 75 deletions(-) create mode 100644 corrlib/pars/openQCD/__init__.py create mode 100644 corrlib/pars/openQCD/ms1.py create mode 100644 corrlib/pars/openQCD/qcd2.py diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 1d36e7f..8a2b41e 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -4,7 +4,9 @@ import os import fnmatch from typing import Any, Optional from pathlib import Path -import struct +from ..pars.openQCD import ms1 +from ..pars.openQCD import qcd2 + def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: @@ -307,79 +309,6 @@ def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, A return t1_dict -def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: - """ - The subroutines written here have names according to the openQCD programs and functions that write out the data. - - Parameters - ---------- - fname: Path - Location of the parameter file. - - Returns - ------- - par_dict: dict - Dictionary holding the parameters specified in the given file. - """ - def _qcd2_write_lat_parms() -> dict[str, Any]: - """ - Unpack the lattice parameters written by write_lat_parms. - """ - lat_pars = {} - t = fp.read(16) - lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends - t = fp.read(8) - nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter - lat_pars["nk"] = nk - lat_pars["isw"] = isw - t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] # beta - t = fp.read(8) - lat_pars["c0"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["c1"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor - kappas = [] - m0s = [] - # read kappas - for ik in range(nk): - t = fp.read(8) - kappas.append(struct.unpack('d', t)[0]) - t = fp.read(8) - m0s.append(struct.unpack('d', t)[0]) - lat_pars["kappas"] = kappas - lat_pars["m0s"] = m0s - return lat_pars - - def _qcd2_write_bc_parms() -> dict[str, Any]: - """ - Unpack the boundary parameters written by write_bc_parms. - """ - bc_pars: dict[str, Any] = {} - t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries - t = fp.read(104) - bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement - bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement - phi: list[list[float]] = [[], []] - phi[0] = list(bc_parms[4:7]) - phi[1] = list(bc_parms[7:10]) - bc_pars["phi"] = phi - bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars - - with open(fname, "rb") as fp: - lat_par_dict = _qcd2_write_lat_parms() - bc_par_dict = _qcd2_write_bc_parms() - fp.close() - par_dict = {} - par_dict["lat"] = lat_par_dict - par_dict["bc"] = bc_par_dict - return par_dict - - def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Thin wrapper around read_qcd2_par_file, getting the file before reading. @@ -401,4 +330,28 @@ def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, fname = path / "projects" / project / file_in_project ds = os.path.join(path, "projects", project) dl.get(fname, dataset=ds) - return read_qcd2_par_file(fname) + return qcd2.read_qcd2_par_file(fname) + + +def load_ms1_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: + """ + Thin wrapper around read_qcd2_ms1_par_file, getting the file before reading. + + Parameters + ---------- + path: Path + Path of the corrlib repository. + project: str + UUID of the project of the parameter-file. + file_in_project: str + The loaction of the file in the project directory. + + Returns + ------- + par_dict: dict + The dict with the parameters read from the .par-file. + """ + fname = path / "projects" / project / file_in_project + ds = os.path.join(path, "projects", project) + dl.get(fname, dataset=ds) + return ms1.read_qcd2_ms1_par_file(fname) diff --git a/corrlib/pars/openQCD/__init__.py b/corrlib/pars/openQCD/__init__.py new file mode 100644 index 0000000..edbac71 --- /dev/null +++ b/corrlib/pars/openQCD/__init__.py @@ -0,0 +1,3 @@ + +from . import ms1 as ms1 +from . import qcd2 as qcd2 diff --git a/corrlib/pars/openQCD/ms1.py b/corrlib/pars/openQCD/ms1.py new file mode 100644 index 0000000..9aabc54 --- /dev/null +++ b/corrlib/pars/openQCD/ms1.py @@ -0,0 +1,81 @@ +import struct + +from typing import Any +from pathlib import Path + + +def read_qcd2_ms1_par_file(fname: Path) -> dict[str, dict[str, Any]]: + """ + The subroutines written here have names according to the openQCD programs and functions that write out the data. + Parameters + ---------- + fname: Path + Location of the parameter file. + + Returns + ------- + par_dict: dict + Dictionary holding the parameters specified in the given file. + """ + + def _qcd2_write_lat_parms() -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the lattice parameters written by write_lat_parms. + """ + lat_pars = {} + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] # beta + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor + kappas = [] + m0s = [] + # read kappas + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + def _qcd2_write_bc_parms() -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars + + with open(fname, "rb") as fp: + lat_par_dict = _qcd2_write_lat_parms() + bc_par_dict = _qcd2_write_bc_parms() + fp.close() + par_dict = {} + par_dict["lat"] = lat_par_dict + par_dict["bc"] = bc_par_dict + return par_dict + + diff --git a/corrlib/pars/openQCD/qcd2.py b/corrlib/pars/openQCD/qcd2.py new file mode 100644 index 0000000..9d63689 --- /dev/null +++ b/corrlib/pars/openQCD/qcd2.py @@ -0,0 +1,77 @@ +import struct + +from pathlib import Path +from typing import Any + + +def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: + """ + The subroutines written here have names according to the openQCD programs and functions that write out the data. + + Parameters + ---------- + fname: Path + Location of the parameter file. + + Returns + ------- + par_dict: dict + Dictionary holding the parameters specified in the given file. + """ + def _qcd2_write_lat_parms() -> dict[str, Any]: + """ + Unpack the lattice parameters written by write_lat_parms. + """ + lat_pars = {} + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] # beta + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor + kappas = [] + m0s = [] + # read kappas + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + def _qcd2_write_bc_parms() -> dict[str, Any]: + """ + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars + + with open(fname, "rb") as fp: + lat_par_dict = _qcd2_write_lat_parms() + bc_par_dict = _qcd2_write_bc_parms() + fp.close() + par_dict = {} + par_dict["lat"] = lat_par_dict + par_dict["bc"] = bc_par_dict + return par_dict From e654d7c1bb6b4948eb43037977142358c37dcd7b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:10:54 +0200 Subject: [PATCH 085/111] restruct: introduce a file for flags --- corrlib/pars/openQCD/flags.py | 59 +++++++++++++++++++++++++++++++++++ corrlib/pars/openQCD/ms1.py | 57 ++------------------------------- corrlib/pars/openQCD/qcd2.py | 55 ++------------------------------ 3 files changed, 65 insertions(+), 106 deletions(-) create mode 100644 corrlib/pars/openQCD/flags.py diff --git a/corrlib/pars/openQCD/flags.py b/corrlib/pars/openQCD/flags.py new file mode 100644 index 0000000..2e4ab17 --- /dev/null +++ b/corrlib/pars/openQCD/flags.py @@ -0,0 +1,59 @@ +""" +Reconstruct the outputs of flags. +""" + +import struct +from typing import Any, BinaryIO + +# lat_parms.c +def lat_parms_write_lat_parms(fp: BinaryIO) -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the lattice parameters written by write_lat_parms. + """ + lat_pars = {} + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] # beta + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor + kappas = [] + m0s = [] + # read kappas + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + +def lat_parms_write_bc_parms(fp: BinaryIO) -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars \ No newline at end of file diff --git a/corrlib/pars/openQCD/ms1.py b/corrlib/pars/openQCD/ms1.py index 9aabc54..4c2aed5 100644 --- a/corrlib/pars/openQCD/ms1.py +++ b/corrlib/pars/openQCD/ms1.py @@ -1,4 +1,4 @@ -import struct +from . import flags from typing import Any from pathlib import Path @@ -18,60 +18,9 @@ def read_qcd2_ms1_par_file(fname: Path) -> dict[str, dict[str, Any]]: Dictionary holding the parameters specified in the given file. """ - def _qcd2_write_lat_parms() -> dict[str, Any]: - """ - NOTE: This is a duplcation from qcd2. - Unpack the lattice parameters written by write_lat_parms. - """ - lat_pars = {} - t = fp.read(16) - lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends - t = fp.read(8) - nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter - lat_pars["nk"] = nk - lat_pars["isw"] = isw - t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] # beta - t = fp.read(8) - lat_pars["c0"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["c1"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor - kappas = [] - m0s = [] - # read kappas - for ik in range(nk): - t = fp.read(8) - kappas.append(struct.unpack('d', t)[0]) - t = fp.read(8) - m0s.append(struct.unpack('d', t)[0]) - lat_pars["kappas"] = kappas - lat_pars["m0s"] = m0s - return lat_pars - - def _qcd2_write_bc_parms() -> dict[str, Any]: - """ - NOTE: This is a duplcation from qcd2. - Unpack the boundary parameters written by write_bc_parms. - """ - bc_pars: dict[str, Any] = {} - t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries - t = fp.read(104) - bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement - bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement - phi: list[list[float]] = [[], []] - phi[0] = list(bc_parms[4:7]) - phi[1] = list(bc_parms[7:10]) - bc_pars["phi"] = phi - bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars - with open(fname, "rb") as fp: - lat_par_dict = _qcd2_write_lat_parms() - bc_par_dict = _qcd2_write_bc_parms() + lat_par_dict = flags.lat_parms_write_lat_parms(fp) + bc_par_dict = flags.lat_parms_write_bc_parms(fp) fp.close() par_dict = {} par_dict["lat"] = lat_par_dict diff --git a/corrlib/pars/openQCD/qcd2.py b/corrlib/pars/openQCD/qcd2.py index 9d63689..3b6e277 100644 --- a/corrlib/pars/openQCD/qcd2.py +++ b/corrlib/pars/openQCD/qcd2.py @@ -1,9 +1,8 @@ -import struct +from . import flags from pathlib import Path from typing import Any - def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: """ The subroutines written here have names according to the openQCD programs and functions that write out the data. @@ -18,58 +17,10 @@ def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: par_dict: dict Dictionary holding the parameters specified in the given file. """ - def _qcd2_write_lat_parms() -> dict[str, Any]: - """ - Unpack the lattice parameters written by write_lat_parms. - """ - lat_pars = {} - t = fp.read(16) - lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends - t = fp.read(8) - nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter - lat_pars["nk"] = nk - lat_pars["isw"] = isw - t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] # beta - t = fp.read(8) - lat_pars["c0"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["c1"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor - kappas = [] - m0s = [] - # read kappas - for ik in range(nk): - t = fp.read(8) - kappas.append(struct.unpack('d', t)[0]) - t = fp.read(8) - m0s.append(struct.unpack('d', t)[0]) - lat_pars["kappas"] = kappas - lat_pars["m0s"] = m0s - return lat_pars - - def _qcd2_write_bc_parms() -> dict[str, Any]: - """ - Unpack the boundary parameters written by write_bc_parms. - """ - bc_pars: dict[str, Any] = {} - t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries - t = fp.read(104) - bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement - bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement - phi: list[list[float]] = [[], []] - phi[0] = list(bc_parms[4:7]) - phi[1] = list(bc_parms[7:10]) - bc_pars["phi"] = phi - bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars with open(fname, "rb") as fp: - lat_par_dict = _qcd2_write_lat_parms() - bc_par_dict = _qcd2_write_bc_parms() + lat_par_dict = flags.lat_parms_qcd2_write_lat_parms(fp) + bc_par_dict = flags.lat_parms_qcd2_write_bc_parms(fp) fp.close() par_dict = {} par_dict["lat"] = lat_par_dict From 9498c1dd735ea498207eedafe4cb37f458c008c2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:11:48 +0200 Subject: [PATCH 086/111] correct function names --- corrlib/pars/openQCD/qcd2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/corrlib/pars/openQCD/qcd2.py b/corrlib/pars/openQCD/qcd2.py index 3b6e277..e73c156 100644 --- a/corrlib/pars/openQCD/qcd2.py +++ b/corrlib/pars/openQCD/qcd2.py @@ -3,6 +3,7 @@ from . import flags from pathlib import Path from typing import Any + def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: """ The subroutines written here have names according to the openQCD programs and functions that write out the data. @@ -19,8 +20,8 @@ def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: """ with open(fname, "rb") as fp: - lat_par_dict = flags.lat_parms_qcd2_write_lat_parms(fp) - bc_par_dict = flags.lat_parms_qcd2_write_bc_parms(fp) + lat_par_dict = flags.lat_parms_write_lat_parms(fp) + bc_par_dict = flags.lat_parms_write_bc_parms(fp) fp.close() par_dict = {} par_dict["lat"] = lat_par_dict From 8394b1fdbdaeaf0e647b7e568c9338f0a4a3a166 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:23:28 +0200 Subject: [PATCH 087/111] rename functions, let write_measurement decide which file type is given --- corrlib/input/openQCD.py | 6 +++--- corrlib/meas_io.py | 7 +++++-- corrlib/toml.py | 10 +++++++--- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 8a2b41e..879b555 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -9,7 +9,7 @@ from ..pars.openQCD import qcd2 -def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: +def load_ms1_infile(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms1 measurements from a parameter file in the project. @@ -73,7 +73,7 @@ def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, return param -def read_ms3_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: +def load_ms3_infile(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms3 measurements from a parameter file in the project. @@ -333,7 +333,7 @@ def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, return qcd2.read_qcd2_par_file(fname) -def load_ms1_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: +def load_ms1_parfile(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Thin wrapper around read_qcd2_ms1_par_file, getting the file before reading. diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 0f9ac02..a48f546 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -74,7 +74,10 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str ms_type = list(measurement.keys())[0] if ms_type == 'ms1': if parameter_file is not None: - parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + if parameter_file.endswith(".ms1.in"): + parameters = openQCD.load_ms1_infile(path, uuid, parameter_file) + elif parameter_file.endswith(".ms1.par"): + parameters = openQCD.load_ms1_parfile(path, uuid, parameter_file) else: raise Exception("Need parameter file for this code!") pars = {} @@ -88,7 +91,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str pars[subkey] = json.dumps(parameters["rw_fcts"][i]) elif ms_type in ['t0', 't1']: if parameter_file is not None: - parameters = openQCD.read_ms3_param(path, uuid, parameter_file) + parameters = openQCD.load_ms3_infile(path, uuid, parameter_file) else: parameters = {} for rwp in ["integrator", "eps", "ntot", "dnms"]: diff --git a/corrlib/toml.py b/corrlib/toml.py index add3739..93ba0f3 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -192,12 +192,16 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: elif project['code'] == 'openQCD': if md['measurement'] == 'ms1': - param = openQCD.read_ms1_param(path, uuid, md['param_file']) + parameter_file = md['param_file'] + if parameter_file.endswith(".ms1.in"): + param = openQCD.load_ms1_infile(path, uuid, parameter_file) + elif parameter_file.endswith(".ms1.par"): + param = openQCD.load_ms1_parfile(path, uuid, parameter_file) param['type'] = 'ms1' measurement = openQCD.read_rwms(path, uuid, md['path'], param, md["prefix"], version=md["version"], names=md['names'], files=md['files']) elif md['measurement'] == 't0': if 'param_file' in md: - param = openQCD.read_ms3_param(path, uuid, md['param_file']) + param = openQCD.load_ms3_infile(path, uuid, md['param_file']) else: param = {} for rwp in ["integrator", "eps", "ntot", "dnms"]: @@ -207,7 +211,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) elif md['measurement'] == 't1': if 'param_file' in md: - param = openQCD.read_ms3_param(path, uuid, md['param_file']) + param = openQCD.load_ms3_infile(path, uuid, md['param_file']) param['type'] = 't1' measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) From 26607632328e4615fce343f7d700baddaad9fdb2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:24:25 +0200 Subject: [PATCH 088/111] lint --- corrlib/pars/openQCD/flags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/pars/openQCD/flags.py b/corrlib/pars/openQCD/flags.py index 2e4ab17..95be919 100644 --- a/corrlib/pars/openQCD/flags.py +++ b/corrlib/pars/openQCD/flags.py @@ -56,4 +56,4 @@ def lat_parms_write_bc_parms(fp: BinaryIO) -> dict[str, Any]: phi[1] = list(bc_parms[7:10]) bc_pars["phi"] = phi bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars \ No newline at end of file + return bc_pars From 3b6a8be0cc1d1ed1e64b014d7b88d96c2dcc446f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:12:38 +0200 Subject: [PATCH 089/111] TEMPFIX: allow ms1 to not have an in or par file --- corrlib/meas_io.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a48f546..82f2e7d 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -79,7 +79,19 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str elif parameter_file.endswith(".ms1.par"): parameters = openQCD.load_ms1_parfile(path, uuid, parameter_file) else: - raise Exception("Need parameter file for this code!") + # Temporary solution + parameters = {} + parameters["rand"] = {} + for nrw in range(1): + if "nsrc" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["nsrc"] = 1 + if "mu" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["mu"] = "None" + if "np" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["np"] = "None" + if "irp" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["irp"] = "None" + # raise Exception("Need parameter file for this code!") pars = {} subkeys = [] for i in range(len(parameters["rw_fcts"])): From 9b628abc69f0ac44f5c2fd392be1da1c48a85d6c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:16:11 +0200 Subject: [PATCH 090/111] TEMPFIX: allow ms1 to not have an in or par file part 2 --- corrlib/toml.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index 93ba0f3..14ec058 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -192,11 +192,26 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: elif project['code'] == 'openQCD': if md['measurement'] == 'ms1': - parameter_file = md['param_file'] - if parameter_file.endswith(".ms1.in"): - param = openQCD.load_ms1_infile(path, uuid, parameter_file) - elif parameter_file.endswith(".ms1.par"): - param = openQCD.load_ms1_parfile(path, uuid, parameter_file) + if 'param_file' in md.keys(): + parameter_file = md['param_file'] + if parameter_file.endswith(".ms1.in"): + param = openQCD.load_ms1_infile(path, uuid, parameter_file) + elif parameter_file.endswith(".ms1.par"): + param = openQCD.load_ms1_parfile(path, uuid, parameter_file) + else: + # Temporary solution + parameters = {} + parameters["rand"] = {} + for nrw in range(1): + if "nsrc" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["nsrc"] = 1 + if "mu" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["mu"] = "None" + if "np" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["np"] = "None" + if "irp" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["irp"] = "None" + param = parameters param['type'] = 'ms1' measurement = openQCD.read_rwms(path, uuid, md['path'], param, md["prefix"], version=md["version"], names=md['names'], files=md['files']) elif md['measurement'] == 't0': From 16dcca3f3dea23022e56e20e03b7ff9c14ed04bc Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:17:29 +0200 Subject: [PATCH 091/111] TEMPFIX: allow ms1 to not have an in or par file part 3 --- corrlib/toml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/corrlib/toml.py b/corrlib/toml.py index 14ec058..f0ba525 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -202,6 +202,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: # Temporary solution parameters = {} parameters["rand"] = {} + parameters["rw_fcts"] = [] for nrw in range(1): if "nsrc" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["nsrc"] = 1 From e3be65beec6c7bbb4fef67098c51ebe502bde125 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:18:59 +0200 Subject: [PATCH 092/111] TEMPFIX: allow ms1 to not have an in or par file part 4 --- corrlib/meas_io.py | 2 +- corrlib/toml.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 82f2e7d..f4e8a83 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -82,6 +82,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str # Temporary solution parameters = {} parameters["rand"] = {} + parameters["rw_fcts"] = [{}] for nrw in range(1): if "nsrc" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["nsrc"] = 1 @@ -91,7 +92,6 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str parameters["rw_fcts"][nrw]["np"] = "None" if "irp" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["irp"] = "None" - # raise Exception("Need parameter file for this code!") pars = {} subkeys = [] for i in range(len(parameters["rw_fcts"])): diff --git a/corrlib/toml.py b/corrlib/toml.py index f0ba525..eb40d5a 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -202,7 +202,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: # Temporary solution parameters = {} parameters["rand"] = {} - parameters["rw_fcts"] = [] + parameters["rw_fcts"] = [{}] for nrw in range(1): if "nsrc" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["nsrc"] = 1 From e95edcb0932815e352011da5a69f297a92bfedd1 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 23:27:31 +0200 Subject: [PATCH 093/111] restruct for easier tests, test drop of sfcf params --- corrlib/find.py | 149 +++++++++++++++++++++++---------------------- tests/find_test.py | 93 +++++++++++++--------------- 2 files changed, 116 insertions(+), 126 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 660e4bf..9d07a1c 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -162,6 +162,78 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project return results +def _sfcf_drop(param, **kwargs): + if 'offset' in kwargs: + if kwargs.get('offset') != param['offset']: + return True + if 'quark_kappas' in kwargs: + kappas = kwargs['quark_kappas'] + if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): + return True + if 'quark_masses' in kwargs: + masses = kwargs['quark_masses'] + if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): + return True + if 'qk1' in kwargs: + quark_kappa1 = kwargs['qk1'] + if not isinstance(quark_kappa1, list): + if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): + return True + else: + if len(quark_kappa1) == 2: + if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): + return True + else: + raise ValueError("quark_kappa1 has to have length 2") + if 'qk2' in kwargs: + quark_kappa2 = kwargs['qk2'] + if not isinstance(quark_kappa2, list): + if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): + return True + else: + if len(quark_kappa2) == 2: + if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): + return True + else: + raise ValueError("quark_kappa2 has to have length 2") + if 'qm1' in kwargs: + quark_mass1 = kwargs['qm1'] + if not isinstance(quark_mass1, list): + if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): + return True + else: + if len(quark_mass1) == 2: + if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): + return True + else: + raise ValueError("quark_mass1 has to have length 2") + if 'qm2' in kwargs: + quark_mass2 = kwargs['qm2'] + if not isinstance(quark_mass2, list): + if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): + return True + else: + if len(quark_mass2) == 2: + if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): + return True + else: + raise ValueError("quark_mass2 has to have length 2") + if 'quark_thetas' in kwargs: + quark_thetas = kwargs['quark_thetas'] + if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): + return True + # careful, this is not save, when multiple contributions are present! + if 'wf1' in kwargs: + wf1 = kwargs['wf1'] + if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): + return True + if 'wf2' in kwargs: + wf2 = kwargs['wf2'] + if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): + return True + return False + + def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: r""" Filter method for the Database entries holding SFCF calculations. @@ -191,84 +263,13 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: results: pd.DataFrame The filtered DataFrame, only holding the records that fit to the parameters given. """ + drops = [] for ind in range(len(results)): result = results.iloc[ind] param = json.loads(result['parameters']) - if 'offset' in kwargs: - if kwargs.get('offset') != param['offset']: - drops.append(ind) - continue - if 'quark_kappas' in kwargs: - kappas = kwargs['quark_kappas'] - if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): - drops.append(ind) - continue - if 'quark_masses' in kwargs: - masses = kwargs['quark_masses'] - if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): - drops.append(ind) - continue - if 'qk1' in kwargs: - quark_kappa1 = kwargs['qk1'] - if not isinstance(quark_kappa1, list): - if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): - drops.append(ind) - continue - else: - if len(quark_kappa1) == 2: - if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): - drops.append(ind) - continue - if 'qk2' in kwargs: - quark_kappa2 = kwargs['qk2'] - if not isinstance(quark_kappa2, list): - if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): - drops.append(ind) - continue - else: - if len(quark_kappa2) == 2: - if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): - drops.append(ind) - continue - if 'qm1' in kwargs: - quark_mass1 = kwargs['qm1'] - if not isinstance(quark_mass1, list): - if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): - drops.append(ind) - continue - else: - if len(quark_mass1) == 2: - if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): - drops.append(ind) - continue - if 'qm2' in kwargs: - quark_mass2 = kwargs['qm2'] - if not isinstance(quark_mass2, list): - if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): - drops.append(ind) - continue - else: - if len(quark_mass2) == 2: - if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): - drops.append(ind) - continue - if 'quark_thetas' in kwargs: - quark_thetas = kwargs['quark_thetas'] - if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): - drops.append(ind) - continue - # careful, this is not save, when multiple contributions are present! - if 'wf1' in kwargs: - wf1 = kwargs['wf1'] - if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): - drops.append(ind) - continue - if 'wf2' in kwargs: - wf2 = kwargs['wf2'] - if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): - drops.append(ind) - continue + if _sfcf_drop(param, **kwargs): + drops.append(ind) return results.drop(drops) diff --git a/tests/find_test.py b/tests/find_test.py index 156e5fe..36d687e 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -211,62 +211,51 @@ def test_db_lookup(tmp_path: Path) -> None: conn.close() -def test_sfcf_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - data = [ - record_0, - record_1, - record_2, - record_3, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) +def test_sfcf_drop() -> None: + parameters0 = { + 'offset': [0,0,0], + 'quarks': [{'mass': 1, 'thetas': [0,0,0]}, {'mass': 2, 'thetas': [0,0,1]}], # m0s = -3.5, -3.75 + 'wf1': [[1, [0, 0]], [0.5, [1, 0]], [.75, [.5, .5]]], + 'wf2': [[1, [2, 1]], [2, [0.5, -0.5]], [.5, [.75, .72]]], + } - assert True + assert not find._sfcf_drop(parameters0, offset=[0,0,0]) + assert find._sfcf_drop(parameters0, offset=[1,0,0]) + + assert not find._sfcf_drop(parameters0, quark_kappas = [1, 2]) + assert find._sfcf_drop(parameters0, quark_kappas = [-3.1, -3.72]) + + assert not find._sfcf_drop(parameters0, quark_masses = [-3.5, -3.75]) + assert find._sfcf_drop(parameters0, quark_masses = [-3.1, -3.72]) + + assert not find._sfcf_drop(parameters0, qk1 = 1) + assert not find._sfcf_drop(parameters0, qk2 = 2) + assert find._sfcf_drop(parameters0, qk1 = 2) + assert find._sfcf_drop(parameters0, qk2 = 1) + + assert not find._sfcf_drop(parameters0, qk1 = [0.5,1.5]) + assert not find._sfcf_drop(parameters0, qk2 = [1.5,2.5]) + assert find._sfcf_drop(parameters0, qk1 = 2) + assert find._sfcf_drop(parameters0, qk2 = 1) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qk1 = [0.5,1,5]) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qk2 = [1,5,2.5]) + + assert find._sfcf_drop(parameters0, qm1 = 1.2) + assert find._sfcf_drop(parameters0, qm2 = 2.2) + assert not find._sfcf_drop(parameters0, qm1 = -3.5) + assert not find._sfcf_drop(parameters0, qm2 = -3.75) + + assert find._sfcf_drop(parameters0, qm2 = 1.2) + assert find._sfcf_drop(parameters0, qm1 = 2.2) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qm1 = [0.5,1,5]) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qm2 = [1,5,2.5]) def test_openQCD_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - data = [ - record_0, - record_1, - record_2, - record_3, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - find.openQCD_filter(df) assert True From 6e886aa06d638a9af4b32f6351d0534e65f70c99 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 10 Apr 2026 09:16:46 +0200 Subject: [PATCH 094/111] add counter for measurements in tomls --- corrlib/toml.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index eb40d5a..e2e257d 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -178,8 +178,10 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: update_aliases(path, uuid, aliases) else: uuid = import_project(path, project['url'], aliases=aliases) + imeas = 1 + nmeas = len(measurements.keys()) for mname, md in measurements.items(): - print("Import measurement: " + mname) + print(f"Import measurement {imeas}/{nmeas}: {mname}") ensemble = md['ensemble'] if project['code'] == 'sfcf': param = sfcf.read_param(path, uuid, md['param_file']) @@ -232,6 +234,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) + imeas += 1 print(mname + " imported.") if not os.path.exists(path / "toml_imports" / uuid): From 74d99f8d5f4a9f24339bf04cf9bdac1e37a3d6d8 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 10 Apr 2026 09:38:31 +0200 Subject: [PATCH 095/111] fix mypy issue --- corrlib/toml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index e2e257d..0d4dfc8 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -202,7 +202,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: param = openQCD.load_ms1_parfile(path, uuid, parameter_file) else: # Temporary solution - parameters = {} + parameters: dict[str, Any] = {} parameters["rand"] = {} parameters["rw_fcts"] = [{}] for nrw in range(1): From 6d1f8f7f1baa7efc26b0964af041af97347cb491 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 10 Apr 2026 10:28:28 +0200 Subject: [PATCH 096/111] add NotImplemented warning for openQCD filter --- corrlib/find.py | 5 ++++- tests/find_test.py | 28 +++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 9d07a1c..1c985e2 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -11,6 +11,7 @@ from typing import Any, Optional, Union from pathlib import Path import datetime as dt from collections.abc import Callable +import warnings def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -162,7 +163,7 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project return results -def _sfcf_drop(param, **kwargs): +def _sfcf_drop(param: dict[str, Any], **kwargs: Any) -> bool: if 'offset' in kwargs: if kwargs.get('offset') != param['offset']: return True @@ -288,6 +289,8 @@ def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: The filtered results. """ + warnings.warn("A filter for openQCD parameters is no implemented yet.", Warning) + return results diff --git a/tests/find_test.py b/tests/find_test.py index 36d687e..f512f15 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -256,7 +256,33 @@ def test_sfcf_drop() -> None: def test_openQCD_filter() -> None: - assert True + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + data = [ + record_0, + record_1, + record_2, + record_3, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.warns(Warning): + find.openQCD_filter(df, a = "asdf") def test_code_filter() -> None: From 91938c3c5a3f590ad48d471e0a19a8702ba94349 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 14:17:41 +0200 Subject: [PATCH 097/111] add second time integrity check --- corrlib/integrity.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index bf890db..f1459d0 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -2,4 +2,9 @@ import datetime as dt def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: - return not (created_at > updated_at) + # we expect created_at <= updated_at <= now + if created_at > updated_at: + return False + if updated_at > dt.datetime.now(): + return False + return True From 0b8c041ee559af903d6aa1526ed1a59753ab775d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 15:34:05 +0200 Subject: [PATCH 098/111] add wrapper functions to check for the validity of the database --- corrlib/integrity.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index f1459d0..db242f6 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -1,10 +1,32 @@ import datetime as dt +from pathlib import Path +from .tools import get_db_file +import pandas as pd +import sqlite3 -def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: +def has_valid_times(result: pd.DataFrame) -> bool: # we expect created_at <= updated_at <= now + created_at = dt.datetime.fromisoformat(result['created_at']) + updated_at = dt.datetime.fromisoformat(result['updated_at']) if created_at > updated_at: return False if updated_at > dt.datetime.now(): return False return True + + +def check_db_integrity(path: Path) -> None: + db = get_db_file(path) + search_expr = "SELECT * FROM 'backlogs'" + conn = sqlite3.connect(db) + results = pd.read_sql(search_expr, conn) + + for result in results: + if not has_valid_times(result): + raise ValueError(f"Result with id {result[id]} has wrong time signatures.") + + +def full_integrity_check(path: Path) -> None: + check_db_integrity(path) + From 65cd55ec0a8d2afbe5a54159cff393d80da466bd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 16:36:31 +0200 Subject: [PATCH 099/111] add test on whether paths are indeed unique --- corrlib/integrity.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index db242f6..70e4694 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -15,6 +15,14 @@ def has_valid_times(result: pd.DataFrame) -> bool: return False return True +def are_keys_unique(db: Path, table: str, col: str) -> bool: + conn = sqlite3.connect(db) + c = conn.cursor() + c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};") + results = c.fetchall()[0] + conn.close() + return bool(results[0] == results[1]) + def check_db_integrity(path: Path) -> None: db = get_db_file(path) @@ -27,6 +35,7 @@ def check_db_integrity(path: Path) -> None: raise ValueError(f"Result with id {result[id]} has wrong time signatures.") + def full_integrity_check(path: Path) -> None: check_db_integrity(path) From 85698c377bca7405d69c63d13d3ef918d35aaf1a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 16:42:39 +0200 Subject: [PATCH 100/111] use uniqueness for complete db check --- corrlib/integrity.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 70e4694..8a414bf 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -26,6 +26,10 @@ def are_keys_unique(db: Path, table: str, col: str) -> bool: def check_db_integrity(path: Path) -> None: db = get_db_file(path) + + if not are_keys_unique(db, 'backlogs', 'path'): + raise Exception("The paths the backlog table of the database links are not unique.") + search_expr = "SELECT * FROM 'backlogs'" conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) From d8bb9e4080017070bc928d19fadd7e175abeebf0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 15 Apr 2026 10:49:03 +0200 Subject: [PATCH 101/111] fix import --- corrlib/find.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 1c985e2..7b07321 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,8 +6,8 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get -from .integrity import check_time_validity -from typing import Any, Optional, Union +from .integrity import has_valid_times +from typing import Any, Optional from pathlib import Path import datetime as dt from collections.abc import Callable @@ -88,7 +88,7 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) - db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at) + db_times_valid = has_valid_times(result) if not db_times_valid: raise ValueError('Time stamps not valid for result with path', result["path"]) From dc424c3e18ecdeeda834865b05dc9bfac6e41e5a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 15 Apr 2026 11:24:25 +0200 Subject: [PATCH 102/111] fix time tests --- tests/find_test.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index f512f15..cc455f9 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -5,6 +5,7 @@ import corrlib.initialization as cinit import pytest import pandas as pd import datalad.api as dl +import datetime as dt def make_sql(path: Path) -> Path: @@ -57,18 +58,20 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: def test_time_filter() -> None: - record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf0", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created - record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf1", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated - record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later - record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf2", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-04-14 12:55:18.229966'] # created and updated later + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf3", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] - record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf4", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier - record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf5", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... + record_G = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf2", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', str(dt.datetime.now() + dt.timedelta(days=2, hours=3, minutes=5, seconds=30))] # created and updated later data = [record_A, record_B, record_C, record_D, record_E] cols = ["name", @@ -141,6 +144,21 @@ def test_time_filter() -> None: with pytest.raises(ValueError): results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + data = [record_A, record_B, record_C, record_D, record_G] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.raises(ValueError): + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + def test_db_lookup(tmp_path: Path) -> None: db = make_sql(tmp_path) From b625bf92438ba3fcae0729bddd57554f68275fdd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 15 Apr 2026 12:02:03 +0200 Subject: [PATCH 103/111] proper row interation --- corrlib/integrity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 8a414bf..d865944 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -5,7 +5,7 @@ import pandas as pd import sqlite3 -def has_valid_times(result: pd.DataFrame) -> bool: +def has_valid_times(result: pd.Series) -> bool: # we expect created_at <= updated_at <= now created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) @@ -34,7 +34,7 @@ def check_db_integrity(path: Path) -> None: conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) - for result in results: + for _, result in results.iterrows(): if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") From 0b1ff3cbad724456e6c77a5a52dd521bb6ac5ffe Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 16:24:31 +0200 Subject: [PATCH 104/111] prepare implementation --- corrlib/cli.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/corrlib/cli.py b/corrlib/cli.py index 6c1c3c5..2d1a9ee 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -1,6 +1,7 @@ from typing import Optional import typer from corrlib import __app_name__ + from .initialization import create from .toml import import_tomls, update_project, reimport_project from .find import find_record, list_projects @@ -8,6 +9,8 @@ from .tools import str2list from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache from .meas_io import load_record as mio_load_record +# from .integrity import + import os from pyerrors import Corr from importlib.metadata import version @@ -137,6 +140,23 @@ def stat( return +@app.command() +def check(path: Path = typer.Option( + Path('./corrlib'), + "--dataset", + "-d", + ), + files: str = typer.Argument( + ), + copy_file: bool = typer.Option( + bool(True), + "--save", + "-s", + ),) -> None: + + "✅" : "❌" + + @app.command() def importer( path: Path = typer.Option( From 23b5d066f7b4e4733629b07786fd1ffbb117efcd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 16:34:30 +0200 Subject: [PATCH 105/111] make integrity checks accassible from cli --- corrlib/cli.py | 13 +++---------- corrlib/integrity.py | 8 +++++--- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 2d1a9ee..d24d8ef 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -9,7 +9,7 @@ from .tools import str2list from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache from .meas_io import load_record as mio_load_record -# from .integrity import +from .integrity import full_integrity_check import os from pyerrors import Corr @@ -146,15 +146,8 @@ def check(path: Path = typer.Option( "--dataset", "-d", ), - files: str = typer.Argument( - ), - copy_file: bool = typer.Option( - bool(True), - "--save", - "-s", - ),) -> None: - - "✅" : "❌" + ) -> None: + full_integrity_check(path) @app.command() diff --git a/corrlib/integrity.py b/corrlib/integrity.py index d865944..dc1216c 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -27,19 +27,21 @@ def are_keys_unique(db: Path, table: str, col: str) -> bool: def check_db_integrity(path: Path) -> None: db = get_db_file(path) - if not are_keys_unique(db, 'backlogs', 'path'): + if not are_keys_unique(path / db, 'backlogs', 'path'): raise Exception("The paths the backlog table of the database links are not unique.") search_expr = "SELECT * FROM 'backlogs'" - conn = sqlite3.connect(db) + conn = sqlite3.connect(path / db) results = pd.read_sql(search_expr, conn) for _, result in results.iterrows(): if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") - + print("DB:\t✅") def full_integrity_check(path: Path) -> None: check_db_integrity(path) + print("Full:\t✅") + From b13136a248f294eb2275da54747cc105473655f3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:32:22 +0200 Subject: [PATCH 106/111] add check for links to files --- corrlib/integrity.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index dc1216c..63572a9 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -3,6 +3,8 @@ from pathlib import Path from .tools import get_db_file import pandas as pd import sqlite3 +from .tracker import get +import pyerrors.input.json as pj def has_valid_times(result: pd.Series) -> bool: @@ -38,10 +40,45 @@ def check_db_integrity(path: Path) -> None: if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") print("DB:\t✅") + return + + +def _check_db2paths(path: Path, meas_paths: list[str]) -> None: + needed_data: dict[str, list[str]] = {} + for mpath in meas_paths: + file = mpath.split("::")[0] + if file not in needed_data.keys(): + needed_data[file] = [] + key = mpath.split("::")[1] + needed_data[file].append(key) + + for file in needed_data.keys(): + get(path, Path(file)) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) + if not set(filedict.keys()).issubset(needed_data[file]): + for key in filedict.keys(): + if key not in needed_data[file]: + raise ValueError(f"Found unintended key {key} in file {file}.") + elif not set(needed_data[file]).issubset(filedict.keys()): + for key in needed_data[file]: + if key not in filedict.keys(): + raise ValueError(f"Did not find data for key {key} that should be in file {file}.") + print("Links:\t✅") + return + + +def check_db_file_links(path: Path) -> None: + db = get_db_file(path) + search_expr = "SELECT path FROM 'backlogs'" + conn = sqlite3.connect(path / db) + results = pd.read_sql(search_expr, conn)['path'].values + print(results) + _check_db2paths(path, results) def full_integrity_check(path: Path) -> None: check_db_integrity(path) + check_db_file_links(path) print("Full:\t✅") From 29ebafc1c44e5e0fe30dcb38fdf22d408f746cec Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:34:53 +0200 Subject: [PATCH 107/111] show progress a little --- corrlib/integrity.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 63572a9..8722840 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -52,7 +52,9 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None: key = mpath.split("::")[1] needed_data[file].append(key) - for file in needed_data.keys(): + totf = len(needed_data.keys()) + for i, file in enumerate(needed_data.keys()): + print(f"Check against file {i}/{totf}: {file}") get(path, Path(file)) filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) if not set(filedict.keys()).issubset(needed_data[file]): From 37ae8185897b62017f5822ec0b727685a905a389 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:37:46 +0200 Subject: [PATCH 108/111] small logic issue --- corrlib/integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 8722840..23fbe0e 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -61,7 +61,7 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None: for key in filedict.keys(): if key not in needed_data[file]: raise ValueError(f"Found unintended key {key} in file {file}.") - elif not set(needed_data[file]).issubset(filedict.keys()): + if not set(needed_data[file]).issubset(filedict.keys()): for key in needed_data[file]: if key not in filedict.keys(): raise ValueError(f"Did not find data for key {key} that should be in file {file}.") From 0535e19bf08ebced63834ff5796591a815ae20f0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:42:47 +0200 Subject: [PATCH 109/111] fix typing --- corrlib/integrity.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 23fbe0e..5f80aa3 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -6,6 +6,8 @@ import sqlite3 from .tracker import get import pyerrors.input.json as pj +from typing import Any + def has_valid_times(result: pd.Series) -> bool: # we expect created_at <= updated_at <= now @@ -74,8 +76,7 @@ def check_db_file_links(path: Path) -> None: search_expr = "SELECT path FROM 'backlogs'" conn = sqlite3.connect(path / db) results = pd.read_sql(search_expr, conn)['path'].values - print(results) - _check_db2paths(path, results) + _check_db2paths(path, list(results)) def full_integrity_check(path: Path) -> None: From 083d7ee3ce6ec2868a2da1b9c4fd73ef22362f50 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:52:18 +0200 Subject: [PATCH 110/111] add dry run for loading data using the integrity functions --- corrlib/meas_io.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index f4e8a83..731da66 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -11,6 +11,7 @@ from .tracker import get, save, unlock import shutil from typing import Any from pathlib import Path +from .integrity import _check_db2paths CACHE_DIR = ".cache" @@ -153,7 +154,7 @@ def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]: return load_records(path, [meas_path])[0] -def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: +def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}, dry_run: bool = False) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -163,14 +164,19 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = Path of the correlator library. meas_paths: list[str] A list of the paths to the correlator in the backlog system. - perloaded: dict[str, Any] - The data that is already prelaoded. Of interest if data has alread been loaded in the same script. + preloaded: dict[str, Any] + The data that is already preloaded. Of interest if data has alread been loaded in the same script. + dry_run: bool + Do not load datda, just check whether we can reach the data we are interested in. Returns ------- - retruned_data: list + returned_data: list The loaded records. """ + if dry_run: + _check_db2paths(path, meas_paths) + return needed_data: dict[str, list[str]] = {} for mpath in meas_paths: file = mpath.split("::")[0] From 51ae53aa024365f47436c11bf69bc376184ac6b4 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:53:13 +0200 Subject: [PATCH 111/111] add empty return --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 731da66..cbd9386 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -176,7 +176,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = """ if dry_run: _check_db2paths(path, meas_paths) - return + return [] needed_data: dict[str, list[str]] = {} for mpath in meas_paths: file = mpath.split("::")[0]