From 0d6ad8f552483c49a1afa025fa17f53eefcba39e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 10:17:15 +0100 Subject: [PATCH 01/92] add a simple method to show the statistics of a record --- corrlib/cli.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 414fcc4..f205026 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -7,6 +7,7 @@ from .find import find_record, list_projects from .tools import str2list from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache +from .meas_io import load_record as mio_load_record import os from importlib.metadata import version @@ -35,6 +36,7 @@ def update( update_project(path, uuid) return + @app.command() def list( path: str = typer.Option( @@ -94,12 +96,39 @@ def find( ensemble: str = typer.Argument(), corr: str = typer.Argument(), code: str = typer.Argument(), + arg: str = typer.Option( + str('all'), + "--argument", + "-a", + ), ) -> None: """ Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. """ results = find_record(path, ensemble, corr, code) - print(results) + if arg == 'all': + print(results) + else: + for r in results[arg].values: + print(r) + + +@app.command() +def stat( + path: str = typer.Option( + str('./corrlib'), + "--dataset", + "-d", + ), + record: str = typer.Argument(), + ) -> None: + """ + Show the statistics of a given record. + """ + record = mio_load_record(path, record)[0] + statistics = record.idl + print(statistics) + return @app.command() From 60b56dfb25db30322b973684e42fbbc07993772d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Feb 2026 16:54:14 +0100 Subject: [PATCH 02/92] fix the file finder for sfcf --- corrlib/input/sfcf.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 6a75b72..621f736 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -3,6 +3,7 @@ import datalad.api as dl import json import os from typing import Any +from fnmatch import fnmatch bi_corrs: list[str] = ["f_P", "fP", "f_p", @@ -298,9 +299,10 @@ def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: if not appended: compact = (version[-1] == "c") for i, item in enumerate(ls): - rep_path = directory + '/' + item - sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, []) - files_to_get.extend([rep_path + "/" + filename for filename in sub_ls]) + if fnmatch(item, prefix + "*"): + rep_path = directory + '/' + item + sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, []) + files_to_get.extend([rep_path + "/" + filename for filename in sub_ls]) print("Getting data, this might take a while...") From 875d7b9461ef9b763853e435c24d1fe2ed3d036c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Feb 2026 09:45:00 +0100 Subject: [PATCH 03/92] write explicit setup-uv link --- .github/workflows/mypy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 791243f..a75fa57 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -20,7 +20,7 @@ jobs: with: show-progress: true - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: https://github.com/astral-sh/setup-uv@v7 with: python-version: ${{ matrix.python-version }} enable-cache: true From 540160c51f4c998341b6441f984d8e7b0bfdc2fd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Feb 2026 09:14:12 +0100 Subject: [PATCH 04/92] use older setup-uv action --- .github/workflows/mypy.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index a75fa57..0add4e9 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -14,13 +14,13 @@ jobs: - name: Install git-annex run: | sudo apt-get update - sudo apt-get install -y git-annex + sudo apt-get install -y git-annex - name: Check out the repository uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v7 + uses: https://github.com/astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} enable-cache: true From 373f3476c070c0ff116f5a29f51c068a099a36f2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Feb 2026 09:18:51 +0100 Subject: [PATCH 05/92] explicit install-uv version --- .github/workflows/mypy.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 0add4e9..c2a36c4 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -20,7 +20,7 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6 + uses: https://github.com/astral-sh/setup-uv@v6.8.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From b2ac8939a33087260a74d375b97a040d8c40640f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Feb 2026 11:20:28 +0100 Subject: [PATCH 06/92] fix: cli show stat failed for single values --- corrlib/cli.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index f205026..5ceb5e3 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -9,6 +9,7 @@ from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache from .meas_io import load_record as mio_load_record import os +from pyerrors import Corr from importlib.metadata import version @@ -120,12 +121,14 @@ def stat( "--dataset", "-d", ), - record: str = typer.Argument(), + record_id: str = typer.Argument(), ) -> None: """ Show the statistics of a given record. """ - record = mio_load_record(path, record)[0] + record = mio_load_record(path, record_id) + if isinstance(record, Corr): + record = record[0] statistics = record.idl print(statistics) return From b51a69bc69b4d634e43adcf86f54fcc3e5201a98 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 6 Mar 2026 15:35:49 +0100 Subject: [PATCH 07/92] fix file unlock --- corrlib/tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/tracker.py b/corrlib/tracker.py index 5cc281c..e535b03 100644 --- a/corrlib/tracker.py +++ b/corrlib/tracker.py @@ -114,7 +114,7 @@ def unlock(path: str, file: str) -> None: """ tracker = get_tracker(path) if tracker == 'datalad': - dl.unlock(file, dataset=path) + dl.unlock(os.path.join(path, file), dataset=path) elif tracker == 'None': Warning("Tracker 'None' does not implement unlock.") pass From 6b2db911bf7ee11e19f442e425a9ee48c28e3969 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 12:56:27 +0100 Subject: [PATCH 08/92] add list for stat types --- corrlib/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 5ceb5e3..4e1b65e 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -39,7 +39,7 @@ def update( @app.command() -def list( +def lister( path: str = typer.Option( str('./corrlib'), "--dataset", @@ -127,7 +127,7 @@ def stat( Show the statistics of a given record. """ record = mio_load_record(path, record_id) - if isinstance(record, Corr): + if isinstance(record, (list, Corr)): record = record[0] statistics = record.idl print(statistics) From a9cc2b3f48199877eedd40f87db27dffe0a73251 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 12:57:48 +0100 Subject: [PATCH 09/92] fix write measurement call and reporting to user --- corrlib/toml.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index 629a499..feafaf6 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -189,7 +189,6 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: measurement = sfcf.read_data(path, uuid, md['path'], md['prefix'], param, version=md['version'], cfg_seperator=md['cfg_seperator'], sep='/') - print(mname + " imported.") elif project['code'] == 'openQCD': if md['measurement'] == 'ms1': param = openQCD.read_ms1_param(path, uuid, md['param_file']) @@ -211,8 +210,8 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: param['type'] = 't1' measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) - - write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else '')) + write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) + print(mname + " imported.") if not os.path.exists(os.path.join(path, "toml_imports", uuid)): os.makedirs(os.path.join(path, "toml_imports", uuid)) From 96731baeb9e9db0c5002c1cdbc3ca1dfb75ca52b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 12:59:04 +0100 Subject: [PATCH 10/92] fix when files are unlocked or saved --- corrlib/meas_io.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 65a0569..300adc3 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -34,22 +34,28 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, """ db_file = get_db_file(path) db = os.path.join(path, db_file) + + files_to_save = [] + get(path, db_file) unlock(path, db_file) + files_to_save.append(db_file) + conn = sqlite3.connect(db) c = conn.cursor() - files = [] for corr in measurement.keys(): file_in_archive = os.path.join('.', 'archive', ensemble, corr, uuid + '.json.gz') file = os.path.join(path, file_in_archive) - files.append(file) known_meas = {} if not os.path.exists(os.path.join(path, '.', 'archive', ensemble, corr)): os.makedirs(os.path.join(path, '.', 'archive', ensemble, corr)) + files_to_save.append(file_in_archive) else: if os.path.exists(file): - unlock(path, file_in_archive) - known_meas = pj.load_json_dict(file) + if file not in files_to_save: + unlock(path, file_in_archive) + files_to_save.append(file_in_archive) + known_meas = pj.load_json_dict(file, verbose=False) if code == "sfcf": parameters = sfcf.read_param(path, uuid, parameter_file) pars = {} @@ -98,9 +104,8 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) conn.commit() pj.dump_dict_to_json(known_meas, file) - files.append(os.path.join(path, db_file)) conn.close() - save(path, message="Add measurements to database", files=files) + save(path, message="Add measurements to database", files=files_to_save) return From 52f6b0f53c558ad86635111502bee39a525d50fc Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 13:00:30 +0100 Subject: [PATCH 11/92] silence readers --- corrlib/input/sfcf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 621f736..8b6e1a3 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -320,10 +320,10 @@ def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: if not param['crr'] == []: if names is not None: data_crr = pe.input.sfcf.read_sfcf_multi(directory, prefix, param['crr'], param['mrr'], corr_type_list, range(len(param['wf_offsets'])), - range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, names=names) + range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, silent=True, names=names) else: data_crr = pe.input.sfcf.read_sfcf_multi(directory, prefix, param['crr'], param['mrr'], corr_type_list, range(len(param['wf_offsets'])), - range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True) + range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, silent=True) for key in data_crr.keys(): data[key] = data_crr[key] From 54b42040a9144033ca9b7aec7d7edfe38057e0e6 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:07:54 +0100 Subject: [PATCH 12/92] use v6 of astral action --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 1fcb8fe..c82f6d1 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} enable-cache: true From 0e0153bd1d6a96f18b81cfc20398d12c5466c831 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:23:49 +0100 Subject: [PATCH 13/92] update uv setup after runner upate --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index c82f6d1..0ae798d 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: astral-sh/setup-uv@v6 + uses: https://github.com/astral-sh/setup-uv@v7.6.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From ca2eb081bb63c3d682d79a11026d6d2a75aad98e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:29:34 +0100 Subject: [PATCH 14/92] older version again --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 0ae798d..6552286 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v7.6.0 + uses: https://github.com/astral-sh/setup-uv@v6 with: python-version: ${{ matrix.python-version }} enable-cache: true From 67a9e4ea4b3d15036c1455a62e60c68a95fcd3ce Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:31:03 +0100 Subject: [PATCH 15/92] use 6.4.0 --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 6552286..82e7484 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,7 +29,7 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6 + uses: https://github.com/astral-sh/setup-uv@v6.4.0 with: python-version: ${{ matrix.python-version }} enable-cache: true From 53067f7c476be759230001949689c8bd04833b86 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:47:02 +0100 Subject: [PATCH 16/92] use v5 --- .github/workflows/pytest.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 82e7484..9c82795 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,10 +29,9 @@ jobs: with: show-progress: true - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6.4.0 + uses: https://github.com/astral-sh/setup-uv@v5 with: python-version: ${{ matrix.python-version }} - enable-cache: true - name: Install corrlib run: uv sync --locked --all-extras --dev --python ${{ matrix.python-version }} - name: Run tests From 4a821006ed3a38574f3f188f35d55bd5862aa558 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:49:35 +0100 Subject: [PATCH 17/92] add setup python --- .github/workflows/pytest.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 9c82795..cd5c0c9 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -28,6 +28,10 @@ jobs: uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 with: From f05caf572dca93d8f5b0758b9b5837716146500b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:52:57 +0100 Subject: [PATCH 18/92] roll out changes --- .github/workflows/mypy.yaml | 7 ++++--- .github/workflows/pytest.yaml | 4 +--- .github/workflows/ruff.yaml | 8 +++++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index c2a36c4..b8ab802 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -19,11 +19,12 @@ jobs: uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true - - name: Install uv - uses: https://github.com/astral-sh/setup-uv@v6.8.0 + - name: Setup python + uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - enable-cache: true + - name: Install uv + uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib run: uv sync --locked --all-extras --dev --python "3.12" - name: Run tests diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index cd5c0c9..af3b667 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,13 +29,11 @@ jobs: with: show-progress: true - name: Setup python - uses: actions/setup-python@v5 + uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - with: - python-version: ${{ matrix.python-version }} - name: Install corrlib run: uv sync --locked --all-extras --dev --python ${{ matrix.python-version }} - name: Run tests diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index 4de4b0b..778743b 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -20,10 +20,12 @@ jobs: uses: https://github.com/RouxAntoine/checkout@v4.1.8 with: show-progress: true - - name: Install uv - uses: astral-sh/setup-uv@v7 + - name: Setup python + uses: https://github.com/actions/setup-python@v5 with: - enable-cache: true + python-version: ${{ matrix.python-version }} + - name: Install uv + uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib run: uv sync --locked --all-extras --dev --python "3.12" - name: Run tests From 0c01d18ecbfac610ef650659f9ebbcf8d352c605 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:56:31 +0100 Subject: [PATCH 19/92] use python 3.12 for mypy and ruff --- .github/workflows/mypy.yaml | 2 +- .github/workflows/ruff.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index b8ab802..4781688 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -22,7 +22,7 @@ jobs: - name: Setup python uses: https://github.com/actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: "3.12" - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index 778743b..e0db1b0 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -23,7 +23,7 @@ jobs: - name: Setup python uses: https://github.com/actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: "3.12" - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib From 4853c0e414959973721ef0d5a849f6214e950502 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:58:33 +0100 Subject: [PATCH 20/92] fix type error for now --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 300adc3..2a8c986 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -12,7 +12,7 @@ import shutil from typing import Any -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None: +def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union(Any, None)) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. From 7ce9742ed562c0f9924ca1ea710c85f7bcec9eaa Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 15:59:45 +0100 Subject: [PATCH 21/92] fix invalid escape in docs --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index 21063ec..022a3f5 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -119,7 +119,7 @@ def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: - """ + r""" Filter method for the Database entries holding SFCF calculations. Parameters From d302ae7e0d40397ba90bef4015a7c0f718237031 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:01:59 +0100 Subject: [PATCH 22/92] fix typo in type annotations --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 2a8c986..2f08052 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -12,7 +12,7 @@ import shutil from typing import Any -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union(Any, None)) -> None: +def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[Any, None]) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. From bd581c6c126bc7ada966e754b2a25cc84fb941b2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:07:57 +0100 Subject: [PATCH 23/92] set up git --- .github/workflows/pytest.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index af3b667..83e88ae 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -20,6 +20,10 @@ jobs: env: UV_CACHE_DIR: /tmp/.uv-cache steps: + - name: Setup git + run: | + git config --global user.email "tester@example.com" + git config --global user.name "Tester" - name: Install git-annex run: | sudo apt-get update From c6f3603fbf36f93d9201fc538de3255c505a0629 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:12:57 +0100 Subject: [PATCH 24/92] Throw errors when parmeter file is not set --- corrlib/meas_io.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 2f08052..8e5855d 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -12,7 +12,7 @@ import shutil from typing import Any -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[Any, None]) -> None: +def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[str, None]) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -57,7 +57,10 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, files_to_save.append(file_in_archive) known_meas = pj.load_json_dict(file, verbose=False) if code == "sfcf": - parameters = sfcf.read_param(path, uuid, parameter_file) + if parameter_file is not None: + parameters = sfcf.read_param(path, uuid, parameter_file) + else: + raise Exception("Need parameter file for this code!") pars = {} subkeys = list(measurement[corr].keys()) for subkey in subkeys: @@ -66,7 +69,10 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, elif code == "openQCD": ms_type = list(measurement.keys())[0] if ms_type == 'ms1': - parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + if parameter_file is not None: + parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + else: + raise Exception("Need parameter file for this code!") pars = {} subkeys = [] for i in range(len(parameters["rw_fcts"])): From 6cfa51f878c358730effaa58174daba7fe290818 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:42:55 +0100 Subject: [PATCH 25/92] setup local cache --- .github/workflows/mypy.yaml | 5 ++++- .github/workflows/pytest.yaml | 6 +++++- .github/workflows/ruff.yaml | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 4781688..8e276d4 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -9,7 +9,10 @@ jobs: mypy: runs-on: ubuntu-latest env: - UV_CACHE_DIR: /tmp/.uv-cache + UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache + RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache + AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir + RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 83e88ae..3411646 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -18,7 +18,10 @@ jobs: runs-on: ubuntu-latest env: - UV_CACHE_DIR: /tmp/.uv-cache + UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache + RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache + AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir + RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Setup git run: | @@ -36,6 +39,7 @@ jobs: uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: 'pip' - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index e0db1b0..db42edb 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -10,7 +10,10 @@ jobs: runs-on: ubuntu-latest env: - UV_CACHE_DIR: /tmp/.uv-cache + UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache + RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache + AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir + RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | From 94b677262a239f483f673fae2f7abf3e0f3e707c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:45:01 +0100 Subject: [PATCH 26/92] remove cache envs --- .github/workflows/mypy.yaml | 3 --- .github/workflows/pytest.yaml | 3 --- .github/workflows/ruff.yaml | 3 --- 3 files changed, 9 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index 8e276d4..fdb5bee 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -10,9 +10,6 @@ jobs: runs-on: ubuntu-latest env: UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache - RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache - AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir - RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 3411646..286afbe 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -19,9 +19,6 @@ jobs: runs-on: ubuntu-latest env: UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache - RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache - AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir - RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Setup git run: | diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index db42edb..a1cb972 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -11,9 +11,6 @@ jobs: runs-on: ubuntu-latest env: UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache - RUNNER_TOOL_CACHE: ${HOME}/.runner_cache/tool_cache - AGENT_TOOLSDIRECTORY: ${HOME}/.runner_cache/tool_dir - RUN_TOOL_CACHE: ${HOME}/.runner_cache/run_tool_cache steps: - name: Install git-annex run: | From c2296f00ee84eef8fabe20d99aae52c890736ea4 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 16:47:36 +0100 Subject: [PATCH 27/92] remove uv cache --- .github/workflows/mypy.yaml | 2 -- .github/workflows/pytest.yaml | 2 -- .github/workflows/ruff.yaml | 2 -- 3 files changed, 6 deletions(-) diff --git a/.github/workflows/mypy.yaml b/.github/workflows/mypy.yaml index fdb5bee..fbd51ec 100644 --- a/.github/workflows/mypy.yaml +++ b/.github/workflows/mypy.yaml @@ -8,8 +8,6 @@ on: jobs: mypy: runs-on: ubuntu-latest - env: - UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache steps: - name: Install git-annex run: | diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 286afbe..da44258 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -17,8 +17,6 @@ jobs: - "3.14" runs-on: ubuntu-latest - env: - UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache steps: - name: Setup git run: | diff --git a/.github/workflows/ruff.yaml b/.github/workflows/ruff.yaml index a1cb972..1da1225 100644 --- a/.github/workflows/ruff.yaml +++ b/.github/workflows/ruff.yaml @@ -9,8 +9,6 @@ jobs: ruff: runs-on: ubuntu-latest - env: - UV_CACHE_DIR: ${HOME}/.runner_cache/uv_cache steps: - name: Install git-annex run: | From a5d6b978ea5d4b642828f220cd2c2b07772af089 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 17:25:44 +0100 Subject: [PATCH 28/92] remove pip cache --- .github/workflows/pytest.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index da44258..b1a4d94 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -34,7 +34,6 @@ jobs: uses: https://github.com/actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'pip' - name: Install uv uses: https://github.com/astral-sh/setup-uv@v5 - name: Install corrlib From b65ee83698df8f8f292670e4aea4565f0595150d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 20 Mar 2026 23:37:40 +0100 Subject: [PATCH 29/92] fix list test --- tests/cli_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/cli_test.py b/tests/cli_test.py index a6b0bd7..d4a4045 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -37,7 +37,7 @@ def test_init_db(tmp_path): table_names = [table[0] for table in tables] for expected_table in expected_tables: assert expected_table in table_names - + cursor.execute("SELECT * FROM projects;") projects = cursor.fetchall() assert len(projects) == 0 @@ -60,7 +60,7 @@ def test_init_db(tmp_path): project_column_names = [col[1] for col in project_columns] for expected_col in expected_project_columns: assert expected_col in project_column_names - + cursor.execute("PRAGMA table_info('backlogs');") backlog_columns = cursor.fetchall() expected_backlog_columns = [ @@ -85,7 +85,7 @@ def test_list(tmp_path): dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 - result = runner.invoke(app, ["list", "--dataset", str(dataset_path), "ensembles"]) + result = runner.invoke(app, ["lister", "--dataset", str(dataset_path), "ensembles"]) assert result.exit_code == 0 - result = runner.invoke(app, ["list", "--dataset", str(dataset_path), "projects"]) + result = runner.invoke(app, ["lister", "--dataset", str(dataset_path), "projects"]) assert result.exit_code == 0 From 776e4a3d8d131d3f724eae1d266ea3df374d3340 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 12:59:33 +0100 Subject: [PATCH 30/92] add further tests for tools --- tests/tools_test.py | 62 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/tests/tools_test.py b/tests/tools_test.py index ee76f1c..60a5a4a 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -1,31 +1,79 @@ - - from corrlib import tools as tl +from configparser import ConfigParser +import os -def test_m2k(): +def test_m2k() -> None: for m in [0.1, 0.5, 1.0]: expected_k = 1 / (2 * m + 8) assert tl.m2k(m) == expected_k -def test_k2m(): +def test_k2m() -> None: for m in [0.1, 0.5, 1.0]: assert tl.k2m(m) == (1/(2*m))-4 -def test_k2m_m2k(): +def test_k2m_m2k() -> None: for m in [0.1, 0.5, 1.0]: k = tl.m2k(m) m_converted = tl.k2m(k) assert abs(m - m_converted) < 1e-9 -def test_str2list(): +def test_str2list() -> None: assert tl.str2list("a,b,c") == ["a", "b", "c"] assert tl.str2list("1,2,3") == ["1", "2", "3"] -def test_list2str(): +def test_list2str() -> None: assert tl.list2str(["a", "b", "c"]) == "a,b,c" assert tl.list2str(["1", "2", "3"]) == "1,2,3" + + +def test_set_config(tmp_path: str) -> None: + section = "core" + option = "test_option" + value = "test_value" + # config is not yet available + tl.set_config(tmp_path, section, option, value) + config_path = os.path.join(tmp_path, '.corrlib') + config = ConfigParser() + config.read(config_path) + assert config.get('core', 'test_option', fallback="not the value") == "test_value" + # now, a config file is already present + section = "core" + option = "test_option2" + value = "test_value2" + tl.set_config(tmp_path, section, option, value) + config_path = os.path.join(tmp_path, '.corrlib') + config = ConfigParser() + config.read(config_path) + assert config.get('core', 'test_option2', fallback="not the value") == "test_value2" + # update option 2 + section = "core" + option = "test_option2" + value = "test_value3" + tl.set_config(tmp_path, section, option, value) + config_path = os.path.join(tmp_path, '.corrlib') + config = ConfigParser() + config.read(config_path) + assert config.get('core', 'test_option2', fallback="not the value") == "test_value3" + + +def test_get_db_file(tmp_path: str) -> None: + section = "paths" + option = "db" + value = "test_value" + # config is not yet available + tl.set_config(tmp_path, section, option, value) + assert tl.get_db_file(tmp_path) == "test_value" + + +def test_cache_enabled(tmp_path: str) -> None: + section = "core" + option = "cached" + value = "True" + # config is not yet available + tl.set_config(tmp_path, section, option, value) + assert tl.get_db_file(tmp_path) From 7e76966d5f7ba9d4b7cfcd6e95e5988c1de21c35 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 12:59:59 +0100 Subject: [PATCH 31/92] replace config file name with var --- corrlib/tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 118b094..26cbf0a 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -88,7 +88,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None: value: Any The value we set the option to. """ - config_path = os.path.join(path, '.corrlib') + config_path = os.path.join(path, CONFIG_FILENAME) config = ConfigParser() if os.path.exists(config_path): config.read(config_path) From 6bb48f151c32ef09bc5119dbbc7aedc0816a794e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 13:01:58 +0100 Subject: [PATCH 32/92] add types --- tests/cli_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/cli_test.py b/tests/cli_test.py index d4a4045..cba0a10 100644 --- a/tests/cli_test.py +++ b/tests/cli_test.py @@ -2,18 +2,19 @@ from typer.testing import CliRunner from corrlib.cli import app import os import sqlite3 as sql +from pathlib import Path runner = CliRunner() -def test_version(): +def test_version() -> None: result = runner.invoke(app, ["--version"]) assert result.exit_code == 0 assert "corrlib" in result.output -def test_init_folders(tmp_path): +def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 @@ -21,7 +22,7 @@ def test_init_folders(tmp_path): assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_db(tmp_path): +def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 @@ -81,7 +82,7 @@ def test_init_db(tmp_path): assert expected_col in backlog_column_names -def test_list(tmp_path): +def test_list(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) assert result.exit_code == 0 From 92f307b83ac794f181d4a855f922f3fa5f9532c8 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 13:05:33 +0100 Subject: [PATCH 33/92] use Path in type annotations --- tests/import_project_test.py | 2 +- tests/sfcf_in_test.py | 2 +- tests/test_initialization.py | 9 +++++---- tests/tools_test.py | 7 ++++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/import_project_test.py b/tests/import_project_test.py index 2dea06f..685d2cf 100644 --- a/tests/import_project_test.py +++ b/tests/import_project_test.py @@ -1,7 +1,7 @@ import corrlib.toml as t -def test_toml_check_measurement_data(): +def test_toml_check_measurement_data() -> None: measurements = { "a": { diff --git a/tests/sfcf_in_test.py b/tests/sfcf_in_test.py index 72921e7..5e4ff83 100644 --- a/tests/sfcf_in_test.py +++ b/tests/sfcf_in_test.py @@ -1,7 +1,7 @@ import corrlib.input.sfcf as input import json -def test_get_specs(): +def test_get_specs() -> None: parameters = { 'crr': [ 'f_P', 'f_A' diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 1ea0ece..9284c82 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -1,22 +1,23 @@ import corrlib.initialization as init import os import sqlite3 as sql +from pathlib import Path -def test_init_folders(tmp_path): +def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path)) assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_folders_no_tracker(tmp_path): +def test_init_folders_no_tracker(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path), tracker="None") assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) -def test_init_config(tmp_path): +def test_init_config(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path), tracker="None") config_path = dataset_path / ".corrlib" @@ -34,7 +35,7 @@ def test_init_config(tmp_path): assert config.get("paths", "import_scripts_path") == "import_scripts" -def test_init_db(tmp_path): +def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" init.create(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) diff --git a/tests/tools_test.py b/tests/tools_test.py index 60a5a4a..0399be0 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -1,6 +1,7 @@ from corrlib import tools as tl from configparser import ConfigParser import os +from pathlib import Path def test_m2k() -> None: @@ -31,7 +32,7 @@ def test_list2str() -> None: assert tl.list2str(["1", "2", "3"]) == "1,2,3" -def test_set_config(tmp_path: str) -> None: +def test_set_config(tmp_path: Path) -> None: section = "core" option = "test_option" value = "test_value" @@ -61,7 +62,7 @@ def test_set_config(tmp_path: str) -> None: assert config.get('core', 'test_option2', fallback="not the value") == "test_value3" -def test_get_db_file(tmp_path: str) -> None: +def test_get_db_file(tmp_path: Path) -> None: section = "paths" option = "db" value = "test_value" @@ -70,7 +71,7 @@ def test_get_db_file(tmp_path: str) -> None: assert tl.get_db_file(tmp_path) == "test_value" -def test_cache_enabled(tmp_path: str) -> None: +def test_cache_enabled(tmp_path: Path) -> None: section = "core" option = "cached" value = "True" From 97e30fa27d5eed7db5133899ba9efafeef7c7c6d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 13:06:12 +0100 Subject: [PATCH 34/92] use Path in type annotations oat 2 --- corrlib/tools.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 26cbf0a..72112c5 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,6 +1,7 @@ import os from configparser import ConfigParser from typing import Any +from pathlib import Path CONFIG_FILENAME = ".corrlib" cached: bool = True @@ -73,7 +74,7 @@ def k2m(k: float) -> float: return (1/(2*k))-4 -def set_config(path: str, section: str, option: str, value: Any) -> None: +def set_config(path: Path, section: str, option: str, value: Any) -> None: """ Set configuration parameters for the library. @@ -100,7 +101,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None: return -def get_db_file(path: str) -> str: +def get_db_file(path: Path) -> str: """ Get the database file associated with the library at the given path. @@ -122,7 +123,7 @@ def get_db_file(path: str) -> str: return db_file -def cache_enabled(path: str) -> bool: +def cache_enabled(path: Path) -> bool: """ Check, whether the library is cached. Fallback is true. From 110ddaf3a1ad141c210c0d1d23ab85b85a930af7 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 15:21:34 +0100 Subject: [PATCH 35/92] add error messages --- corrlib/tools.py | 6 ++++++ tests/tools_test.py | 11 ++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 72112c5..727ed30 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -119,6 +119,8 @@ def get_db_file(path: Path) -> str: config = ConfigParser() if os.path.exists(config_path): config.read(config_path) + else: + raise FileNotFoundError("Configuration file not found.") db_file = config.get('paths', 'db', fallback='backlogger.db') return db_file @@ -142,6 +144,10 @@ def cache_enabled(path: Path) -> bool: config = ConfigParser() if os.path.exists(config_path): config.read(config_path) + else: + raise FileNotFoundError("Configuration file not found.") cached_str = config.get('core', 'cached', fallback='True') + if cached_str not in ['True', 'False']: + raise ValueError(f"String {cached_str} is not a valid option, only True and False are allowed!") cached_bool = cached_str == ('True') return cached_bool diff --git a/tests/tools_test.py b/tests/tools_test.py index 0399be0..9be88b4 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -2,6 +2,7 @@ from corrlib import tools as tl from configparser import ConfigParser import os from pathlib import Path +import pytest def test_m2k() -> None: @@ -74,7 +75,11 @@ def test_get_db_file(tmp_path: Path) -> None: def test_cache_enabled(tmp_path: Path) -> None: section = "core" option = "cached" - value = "True" # config is not yet available - tl.set_config(tmp_path, section, option, value) - assert tl.get_db_file(tmp_path) + tl.set_config(tmp_path, section, option, "True") + assert tl.cache_enabled(tmp_path) + tl.set_config(tmp_path, section, option, "False") + assert not tl.cache_enabled(tmp_path) + tl.set_config(tmp_path, section, option, "lalala") + with pytest.raises(ValueError) as e_info: + tl.cache_enabled(tmp_path) From 8162758ceca1985ad6818cd803426ff0e5b207cf Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 16:15:55 +0100 Subject: [PATCH 36/92] use pathlib.Path for directories and files --- corrlib/cli.py | 41 ++++++++++++------------ corrlib/find.py | 17 +++++----- corrlib/git_tools.py | 19 ++++++------ corrlib/initialization.py | 23 +++++++------- corrlib/input/openQCD.py | 11 ++++--- corrlib/input/sfcf.py | 7 +++-- corrlib/main.py | 24 +++++++-------- corrlib/meas_io.py | 60 +++++++++++++++++++----------------- corrlib/toml.py | 23 +++++++------- corrlib/tools.py | 4 +-- corrlib/tracker.py | 17 +++++----- tests/test_initialization.py | 8 ++--- tests/tools_test.py | 8 ++--- 13 files changed, 137 insertions(+), 125 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 4e1b65e..b28692a 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -11,6 +11,7 @@ from .meas_io import load_record as mio_load_record import os from pyerrors import Corr from importlib.metadata import version +from pathlib import Path app = typer.Typer() @@ -24,8 +25,8 @@ def _version_callback(value: bool) -> None: @app.command() def update( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -40,8 +41,8 @@ def update( @app.command() def lister( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -52,8 +53,8 @@ def lister( """ if entities in ['ensembles', 'Ensembles','ENSEMBLES']: print("Ensembles:") - for item in os.listdir(path + "/archive"): - if os.path.isdir(os.path.join(path + "/archive", item)): + for item in os.listdir(path / "archive"): + if os.path.isdir(path / "archive" / item): print(item) elif entities == 'projects': results = list_projects(path) @@ -71,8 +72,8 @@ def lister( @app.command() def alias_add( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -89,8 +90,8 @@ def alias_add( @app.command() def find( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -116,8 +117,8 @@ def find( @app.command() def stat( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -136,8 +137,8 @@ def stat( @app.command() def importer( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -159,8 +160,8 @@ def importer( @app.command() def reimporter( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -183,8 +184,8 @@ def reimporter( @app.command() def init( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), @@ -203,8 +204,8 @@ def init( @app.command() def drop_cache( - path: str = typer.Option( - str('./corrlib'), + path: Path = typer.Option( + Path('./corrlib'), "--dataset", "-d", ), diff --git a/corrlib/find.py b/corrlib/find.py index 022a3f5..faef5db 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -7,9 +7,10 @@ from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get from typing import Any, Optional +from pathlib import Path -def _project_lookup_by_alias(db: str, alias: str) -> str: +def _project_lookup_by_alias(db: Path, alias: str) -> str: """ Lookup a projects UUID by its (human-readable) alias. @@ -37,7 +38,7 @@ def _project_lookup_by_alias(db: str, alias: str) -> str: return str(results[0][0]) -def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: +def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: """ Return the project information available in the database by UUID. @@ -61,7 +62,7 @@ def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]: return results -def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, +def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: """ Look up a correlator record in the database by the data given to the method. @@ -228,10 +229,10 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: return results.drop(drops) -def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, +def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) @@ -246,7 +247,7 @@ def find_record(path: str, ensemble: str, correlator_name: str, code: str, proje return results.reset_index() -def find_project(path: str, name: str) -> str: +def find_project(path: Path, name: str) -> str: """ Find a project by it's human readable name. @@ -264,10 +265,10 @@ def find_project(path: str, name: str) -> str: """ db_file = get_db_file(path) get(path, db_file) - return _project_lookup_by_alias(os.path.join(path, db_file), name) + return _project_lookup_by_alias(path / db_file, name) -def list_projects(path: str) -> list[tuple[str, str]]: +def list_projects(path: Path) -> list[tuple[str, str]]: """ List all projects known to the library. diff --git a/corrlib/git_tools.py b/corrlib/git_tools.py index c6e7522..d77f109 100644 --- a/corrlib/git_tools.py +++ b/corrlib/git_tools.py @@ -1,27 +1,28 @@ import os from .tracker import save import git +from pathlib import Path GITMODULES_FILE = '.gitmodules' -def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: +def move_submodule(repo_path: Path, old_path: Path, new_path: Path) -> None: """ Move a submodule to a new location. Parameters ---------- - repo_path: str + repo_path: Path Path to the repository. - old_path: str + old_path: Path The old path of the module. - new_path: str + new_path: Path The new path of the module. """ - os.rename(os.path.join(repo_path, old_path), os.path.join(repo_path, new_path)) + os.rename(repo_path / old_path, repo_path / new_path) - gitmodules_file_path = os.path.join(repo_path, GITMODULES_FILE) + gitmodules_file_path = repo_path / GITMODULES_FILE # update paths in .gitmodules with open(gitmodules_file_path, 'r') as file: @@ -29,8 +30,8 @@ def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: updated_lines = [] for line in lines: - if old_path in line: - line = line.replace(old_path, new_path) + if str(old_path) in line: + line = line.replace(str(old_path), str(new_path)) updated_lines.append(line) with open(gitmodules_file_path, 'w') as file: @@ -40,6 +41,6 @@ def move_submodule(repo_path: str, old_path: str, new_path: str) -> None: repo = git.Repo(repo_path) repo.git.add('.gitmodules') # save new state of the dataset - save(repo_path, message=f"Move module from {old_path} to {new_path}", files=['.gitmodules', repo_path]) + save(repo_path, message=f"Move module from {old_path} to {new_path}", files=[Path('.gitmodules'), repo_path]) return diff --git a/corrlib/initialization.py b/corrlib/initialization.py index bb71db6..c06a201 100644 --- a/corrlib/initialization.py +++ b/corrlib/initialization.py @@ -2,9 +2,10 @@ from configparser import ConfigParser import sqlite3 import os from .tracker import save, init +from pathlib import Path -def _create_db(db: str) -> None: +def _create_db(db: Path) -> None: """ Create the database file and the table. @@ -40,7 +41,7 @@ def _create_db(db: str) -> None: return -def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: +def _create_config(path: Path, tracker: str, cached: bool) -> ConfigParser: """ Create the config file construction for backlogger. @@ -75,7 +76,7 @@ def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser: return config -def _write_config(path: str, config: ConfigParser) -> None: +def _write_config(path: Path, config: ConfigParser) -> None: """ Write the config file to disk. @@ -91,7 +92,7 @@ def _write_config(path: str, config: ConfigParser) -> None: return -def create(path: str, tracker: str = 'datalad', cached: bool = True) -> None: +def create(path: Path, tracker: str = 'datalad', cached: bool = True) -> None: """ Create folder of backlogs. @@ -107,13 +108,13 @@ def create(path: str, tracker: str = 'datalad', cached: bool = True) -> None: config = _create_config(path, tracker, cached) init(path, tracker) _write_config(path, config) - _create_db(os.path.join(path, config['paths']['db'])) - os.chmod(os.path.join(path, config['paths']['db']), 0o666) - os.makedirs(os.path.join(path, config['paths']['projects_path'])) - os.makedirs(os.path.join(path, config['paths']['archive_path'])) - os.makedirs(os.path.join(path, config['paths']['toml_imports_path'])) - os.makedirs(os.path.join(path, config['paths']['import_scripts_path'], 'template.py')) - with open(os.path.join(path, ".gitignore"), "w") as fp: + _create_db(path / config['paths']['db']) + os.chmod(path / config['paths']['db'], 0o666) + os.makedirs(path / config['paths']['projects_path']) + os.makedirs(path / config['paths']['archive_path']) + os.makedirs(path / config['paths']['toml_imports_path']) + os.makedirs(path / config['paths']['import_scripts_path'] / 'template.py') + with open(path / ".gitignore", "w") as fp: fp.write(".cache") fp.close() save(path, message="Initialized correlator library") diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 71ebec6..a3bce6f 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -3,9 +3,10 @@ import datalad.api as dl import os import fnmatch from typing import Any, Optional +from pathlib import Path -def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: +def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms1 measurements from a parameter file in the project. @@ -69,7 +70,7 @@ def read_ms1_param(path: str, project: str, file_in_project: str) -> dict[str, A return param -def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: +def read_ms3_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms3 measurements from a parameter file in the project. @@ -103,7 +104,7 @@ def read_ms3_param(path: str, project: str, file_in_project: str) -> dict[str, A return param -def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: +def read_rwms(path: Path, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, postfix: str="ms1", version: str='2.0', names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: """ Read reweighting factor measurements from the project. @@ -160,7 +161,7 @@ def read_rwms(path: str, project: str, dir_in_project: str, param: dict[str, Any return rw_dict -def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str="", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: +def extract_t0(path: Path, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str="", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: """ Extract t0 measurements from the project. @@ -234,7 +235,7 @@ def extract_t0(path: str, project: str, dir_in_project: str, param: dict[str, An return t0_dict -def extract_t1(path: str, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = "", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: +def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, Any], prefix: str, dtr_read: int, xmin: int, spatial_extent: int, fit_range: int = 5, postfix: str = "", names: Optional[list[str]]=None, files: Optional[list[str]]=None) -> dict[str, Any]: """ Extract t1 measurements from the project. diff --git a/corrlib/input/sfcf.py b/corrlib/input/sfcf.py index 8b6e1a3..acd8261 100644 --- a/corrlib/input/sfcf.py +++ b/corrlib/input/sfcf.py @@ -4,6 +4,7 @@ import json import os from typing import Any from fnmatch import fnmatch +from pathlib import Path bi_corrs: list[str] = ["f_P", "fP", "f_p", @@ -80,7 +81,7 @@ for c in bib_corrs: corr_types[c] = 'bib' -def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: +def read_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters from the sfcf file. @@ -96,7 +97,7 @@ def read_param(path: str, project: str, file_in_project: str) -> dict[str, Any]: """ - file = path + "/projects/" + project + '/' + file_in_project + file = path / "projects" / project / file_in_project dl.get(file, dataset=path) with open(file, 'r') as f: lines = f.readlines() @@ -257,7 +258,7 @@ def get_specs(key: str, parameters: dict[str, Any], sep: str = '/') -> str: return s -def read_data(path: str, project: str, dir_in_project: str, prefix: str, param: dict[str, Any], version: str = '1.0c', cfg_seperator: str = 'n', sep: str = '/', **kwargs: Any) -> dict[str, Any]: +def read_data(path: Path, project: str, dir_in_project: str, prefix: str, param: dict[str, Any], version: str = '1.0c', cfg_seperator: str = 'n', sep: str = '/', **kwargs: Any) -> dict[str, Any]: """ Extract the data from the sfcf file. diff --git a/corrlib/main.py b/corrlib/main.py index 88b99b3..831b69d 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -8,9 +8,10 @@ from .find import _project_lookup_by_id from .tools import list2str, str2list, get_db_file from .tracker import get, save, unlock, clone, drop from typing import Union, Optional +from pathlib import Path -def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Union[list[str], None]=None, aliases: Union[list[str], None]=None, code: Union[str, None]=None) -> None: +def create_project(path: Path, uuid: str, owner: Union[str, None]=None, tags: Union[list[str], None]=None, aliases: Union[list[str], None]=None, code: Union[str, None]=None) -> None: """ Create a new project entry in the database. @@ -48,7 +49,7 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni return -def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] = None) -> None: +def update_project_data(path: Path, uuid: str, prop: str, value: Union[str, None] = None) -> None: """ Update/Edit a project entry in the database. Thin wrapper around sql3 call. @@ -74,9 +75,9 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None] return -def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: +def update_aliases(path: Path, uuid: str, aliases: list[str]) -> None: db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file get(path, db_file) known_data = _project_lookup_by_id(db, uuid)[0] known_aliases = known_data[1] @@ -102,7 +103,7 @@ def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: return -def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Optional[list[str]]=None, aliases: Optional[list[str]]=None, code: Optional[str]=None, isDataset: bool=True) -> str: +def import_project(path: Path, url: str, owner: Union[str, None]=None, tags: Optional[list[str]]=None, aliases: Optional[list[str]]=None, code: Optional[str]=None, isDataset: bool=True) -> str: """ Import a datalad dataset into the backlogger. @@ -134,14 +135,14 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti uuid = str(conf.get("datalad.dataset.id")) if not uuid: raise ValueError("The dataset does not have a uuid!") - if not os.path.exists(path + "/projects/" + uuid): + if not os.path.exists(path / "projects" / uuid): db_file = get_db_file(path) get(path, db_file) unlock(path, db_file) create_project(path, uuid, owner, tags, aliases, code) - move_submodule(path, 'projects/tmp', 'projects/' + uuid) - os.mkdir(path + '/import_scripts/' + uuid) - save(path, message="Import project from " + url, files=['projects/' + uuid, db_file]) + move_submodule(path, Path('projects/tmp'), Path('projects') / uuid) + os.mkdir(path / 'import_scripts' / uuid) + save(path, message="Import project from " + url, files=[Path(f'projects/{uuid}'), db_file]) else: dl.drop(tmp_path, reckless='kill') shutil.rmtree(tmp_path) @@ -156,7 +157,7 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti return uuid -def drop_project_data(path: str, uuid: str, path_in_project: str = "") -> None: +def drop_project_data(path: Path, uuid: str, path_in_project: str = "") -> None: """ Drop (parts of) a project to free up diskspace @@ -169,6 +170,5 @@ def drop_project_data(path: str, uuid: str, path_in_project: str = "") -> None: path_pn_project: str, optional If set, only the given path within the project is dropped. """ - drop(path + "/projects/" + uuid + "/" + path_in_project) + drop(path / "projects" / uuid / path_in_project) return - diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 8e5855d..be80b6f 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -10,9 +10,13 @@ from .tools import get_db_file, cache_enabled from .tracker import get, save, unlock import shutil from typing import Any +from pathlib import Path -def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[str, None]) -> None: +CACHE_DIR = ".cache" + + +def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: Union[str, None]) -> None: """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -33,7 +37,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, The parameter file used for the measurement. """ db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file files_to_save = [] @@ -44,11 +48,11 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, conn = sqlite3.connect(db) c = conn.cursor() for corr in measurement.keys(): - file_in_archive = os.path.join('.', 'archive', ensemble, corr, uuid + '.json.gz') - file = os.path.join(path, file_in_archive) + file_in_archive = Path('.') / 'archive' / ensemble / corr / str(uuid + '.json.gz') + file = path / file_in_archive known_meas = {} - if not os.path.exists(os.path.join(path, '.', 'archive', ensemble, corr)): - os.makedirs(os.path.join(path, '.', 'archive', ensemble, corr)) + if not os.path.exists(path / 'archive' / ensemble / corr): + os.makedirs(path / 'archive' / ensemble / corr) files_to_save.append(file_in_archive) else: if os.path.exists(file): @@ -99,7 +103,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, pars[subkey] = json.dumps(parameters) for subkey in subkeys: parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() - meas_path = file_in_archive + "::" + parHash + meas_path = str(file_in_archive) + "::" + parHash known_meas[parHash] = measurement[corr][subkey] @@ -115,7 +119,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, return -def load_record(path: str, meas_path: str) -> Union[Corr, Obs]: +def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]: """ Load a list of records by their paths. @@ -134,7 +138,7 @@ def load_record(path: str, meas_path: str) -> Union[Corr, Obs]: return load_records(path, [meas_path])[0] -def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: +def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -162,11 +166,11 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = { returned_data: list[Any] = [] for file in needed_data.keys(): for key in list(needed_data[file]): - if os.path.exists(cache_path(path, file, key) + ".p"): - returned_data.append(load_object(cache_path(path, file, key) + ".p")) + if os.path.exists(str(cache_path(path, file, key)) + ".p"): + returned_data.append(load_object(str(cache_path(path, file, key)) + ".p")) else: if file not in preloaded: - preloaded[file] = preload(path, file) + preloaded[file] = preload(path, Path(file)) returned_data.append(preloaded[file][key]) if cache_enabled(path): if not os.path.exists(cache_dir(path, file)): @@ -175,7 +179,7 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = { return returned_data -def cache_dir(path: str, file: str) -> str: +def cache_dir(path: Path, file: str) -> Path: """ Returns the directory corresponding to the cache for the given file. @@ -190,14 +194,14 @@ def cache_dir(path: str, file: str) -> str: cache_path: str The path holding the cached data for the given file. """ - cache_path_list = [path] - cache_path_list.append(".cache") - cache_path_list.extend(file.split("/")[1:]) - cache_path = "/".join(cache_path_list) + cache_path_list = file.split("/")[1:] + cache_path = path / CACHE_DIR + for directory in cache_path_list: + cache_path /= directory return cache_path -def cache_path(path: str, file: str, key: str) -> str: +def cache_path(path: Path, file: str, key: str) -> Path: """ Parameters ---------- @@ -213,11 +217,11 @@ def cache_path(path: str, file: str, key: str) -> str: cache_path: str The path at which the measurement of the given file and key is cached. """ - cache_path = os.path.join(cache_dir(path, file), key) + cache_path = cache_dir(path, file) / key return cache_path -def preload(path: str, file: str) -> dict[str, Any]: +def preload(path: Path, file: Path) -> dict[str, Any]: """ Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method. @@ -234,12 +238,12 @@ def preload(path: str, file: str) -> dict[str, Any]: The data read from the file. """ get(path, file) - filedict: dict[str, Any] = pj.load_json_dict(os.path.join(path, file)) + filedict: dict[str, Any] = pj.load_json_dict(path / file) print("> read file") return filedict -def drop_record(path: str, meas_path: str) -> None: +def drop_record(path: Path, meas_path: str) -> None: """ Drop a record by it's path. @@ -251,9 +255,9 @@ def drop_record(path: str, meas_path: str) -> None: The measurement path as noted in the database. """ file_in_archive = meas_path.split("::")[0] - file = os.path.join(path, file_in_archive) + file = path / file_in_archive db_file = get_db_file(path) - db = os.path.join(path, db_file) + db = path / db_file get(path, db_file) sub_key = meas_path.split("::")[1] unlock(path, db_file) @@ -268,7 +272,7 @@ def drop_record(path: str, meas_path: str) -> None: known_meas = pj.load_json_dict(file) if sub_key in known_meas: del known_meas[sub_key] - unlock(path, file_in_archive) + unlock(path, Path(file_in_archive)) pj.dump_dict_to_json(known_meas, file) save(path, message="Drop measurements to database", files=[db, file]) return @@ -276,7 +280,7 @@ def drop_record(path: str, meas_path: str) -> None: raise ValueError("This measurement does not exist as a file!") -def drop_cache(path: str) -> None: +def drop_cache(path: Path) -> None: """ Drop the cache directory of the library. @@ -285,7 +289,7 @@ def drop_cache(path: str) -> None: path: str The path of the library. """ - cache_dir = os.path.join(path, ".cache") + cache_dir = path / ".cache" for f in os.listdir(cache_dir): - shutil.rmtree(os.path.join(cache_dir, f)) + shutil.rmtree(cache_dir / f) return diff --git a/corrlib/toml.py b/corrlib/toml.py index feafaf6..add3739 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -19,6 +19,7 @@ from .meas_io import write_measurement import os from .input.implementations import codes as known_codes from typing import Any +from pathlib import Path def replace_string(string: str, name: str, val: str) -> str: @@ -126,7 +127,7 @@ def check_measurement_data(measurements: dict[str, dict[str, str]], code: str) - return -def import_tomls(path: str, files: list[str], copy_files: bool=True) -> None: +def import_tomls(path: Path, files: list[str], copy_files: bool=True) -> None: """ Import multiple toml files. @@ -144,7 +145,7 @@ def import_tomls(path: str, files: list[str], copy_files: bool=True) -> None: return -def import_toml(path: str, file: str, copy_file: bool=True) -> None: +def import_toml(path: Path, file: str, copy_file: bool=True) -> None: """ Import a project decribed by a .toml file. @@ -171,7 +172,7 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: aliases = project.get('aliases', []) uuid = project.get('uuid', None) if uuid is not None: - if not os.path.exists(path + "/projects/" + uuid): + if not os.path.exists(path / "projects" / uuid): uuid = import_project(path, project['url'], aliases=aliases) else: update_aliases(path, uuid, aliases) @@ -213,18 +214,18 @@ def import_toml(path: str, file: str, copy_file: bool=True) -> None: write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) print(mname + " imported.") - if not os.path.exists(os.path.join(path, "toml_imports", uuid)): - os.makedirs(os.path.join(path, "toml_imports", uuid)) + if not os.path.exists(path / "toml_imports" / uuid): + os.makedirs(path / "toml_imports" / uuid) if copy_file: - import_file = os.path.join(path, "toml_imports", uuid, file.split("/")[-1]) + import_file = path / "toml_imports" / uuid / file.split("/")[-1] shutil.copy(file, import_file) - save(path, files=[import_file], message="Import using " + import_file) - print("File copied to " + import_file) + save(path, files=[import_file], message=f"Import using {import_file}") + print(f"File copied to {import_file}") print("Imported project.") return -def reimport_project(path: str, uuid: str) -> None: +def reimport_project(path: Path, uuid: str) -> None: """ Reimport an existing project using the files that are already available for this project. @@ -235,14 +236,14 @@ def reimport_project(path: str, uuid: str) -> None: uuid: str uuid of the project that is to be reimported. """ - config_path = "/".join([path, "import_scripts", uuid]) + config_path = path / "import_scripts" / uuid for p, filenames, dirnames in os.walk(config_path): for fname in filenames: import_toml(path, os.path.join(config_path, fname), copy_file=False) return -def update_project(path: str, uuid: str) -> None: +def update_project(path: Path, uuid: str) -> None: """ Update all entries associated with a given project. diff --git a/corrlib/tools.py b/corrlib/tools.py index 727ed30..93f0678 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -101,7 +101,7 @@ def set_config(path: Path, section: str, option: str, value: Any) -> None: return -def get_db_file(path: Path) -> str: +def get_db_file(path: Path) -> Path: """ Get the database file associated with the library at the given path. @@ -121,7 +121,7 @@ def get_db_file(path: Path) -> str: config.read(config_path) else: raise FileNotFoundError("Configuration file not found.") - db_file = config.get('paths', 'db', fallback='backlogger.db') + db_file = Path(config.get('paths', 'db', fallback='backlogger.db')) return db_file diff --git a/corrlib/tracker.py b/corrlib/tracker.py index e535b03..a6e9bf4 100644 --- a/corrlib/tracker.py +++ b/corrlib/tracker.py @@ -4,9 +4,10 @@ import datalad.api as dl from typing import Optional import shutil from .tools import get_db_file +from pathlib import Path -def get_tracker(path: str) -> str: +def get_tracker(path: Path) -> str: """ Get the tracker used in the dataset located at path. @@ -30,7 +31,7 @@ def get_tracker(path: str) -> str: return tracker -def get(path: str, file: str) -> None: +def get(path: Path, file: Path) -> None: """ Wrapper function to get a file from the dataset located at path with the specified tracker. @@ -56,7 +57,7 @@ def get(path: str, file: str) -> None: return -def save(path: str, message: str, files: Optional[list[str]]=None) -> None: +def save(path: Path, message: str, files: Optional[list[Path]]=None) -> None: """ Wrapper function to save a file to the dataset located at path with the specified tracker. @@ -72,7 +73,7 @@ def save(path: str, message: str, files: Optional[list[str]]=None) -> None: tracker = get_tracker(path) if tracker == 'datalad': if files is not None: - files = [os.path.join(path, f) for f in files] + files = [path / f for f in files] dl.save(files, message=message, dataset=path) elif tracker == 'None': Warning("Tracker 'None' does not implement save.") @@ -81,7 +82,7 @@ def save(path: str, message: str, files: Optional[list[str]]=None) -> None: raise ValueError(f"Tracker {tracker} is not supported.") -def init(path: str, tracker: str='datalad') -> None: +def init(path: Path, tracker: str='datalad') -> None: """ Initialize a dataset at the specified path with the specified tracker. @@ -101,7 +102,7 @@ def init(path: str, tracker: str='datalad') -> None: return -def unlock(path: str, file: str) -> None: +def unlock(path: Path, file: Path) -> None: """ Wrapper function to unlock a file in the dataset located at path with the specified tracker. @@ -123,7 +124,7 @@ def unlock(path: str, file: str) -> None: return -def clone(path: str, source: str, target: str) -> None: +def clone(path: Path, source: str, target: str) -> None: """ Wrapper function to clone a dataset from source to target with the specified tracker. Parameters @@ -147,7 +148,7 @@ def clone(path: str, source: str, target: str) -> None: return -def drop(path: str, reckless: Optional[str]=None) -> None: +def drop(path: Path, reckless: Optional[str]=None) -> None: """ Wrapper function to drop data from a dataset located at path with the specified tracker. diff --git a/tests/test_initialization.py b/tests/test_initialization.py index 9284c82..d78fb15 100644 --- a/tests/test_initialization.py +++ b/tests/test_initialization.py @@ -5,21 +5,21 @@ from pathlib import Path def test_init_folders(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path)) + init.create(dataset_path) assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) def test_init_folders_no_tracker(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path), tracker="None") + init.create(dataset_path, tracker="None") assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path / "backlogger.db")) def test_init_config(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path), tracker="None") + init.create(dataset_path, tracker="None") config_path = dataset_path / ".corrlib" assert os.path.exists(str(config_path)) from configparser import ConfigParser @@ -37,7 +37,7 @@ def test_init_config(tmp_path: Path) -> None: def test_init_db(tmp_path: Path) -> None: dataset_path = tmp_path / "test_dataset" - init.create(str(dataset_path)) + init.create(dataset_path) assert os.path.exists(str(dataset_path / "backlogger.db")) conn = sql.connect(str(dataset_path / "backlogger.db")) cursor = conn.cursor() diff --git a/tests/tools_test.py b/tests/tools_test.py index 9be88b4..a5427f3 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -39,7 +39,7 @@ def test_set_config(tmp_path: Path) -> None: value = "test_value" # config is not yet available tl.set_config(tmp_path, section, option, value) - config_path = os.path.join(tmp_path, '.corrlib') + config_path = tmp_path / '.corrlib' config = ConfigParser() config.read(config_path) assert config.get('core', 'test_option', fallback="not the value") == "test_value" @@ -48,7 +48,7 @@ def test_set_config(tmp_path: Path) -> None: option = "test_option2" value = "test_value2" tl.set_config(tmp_path, section, option, value) - config_path = os.path.join(tmp_path, '.corrlib') + config_path = tmp_path / '.corrlib' config = ConfigParser() config.read(config_path) assert config.get('core', 'test_option2', fallback="not the value") == "test_value2" @@ -57,7 +57,7 @@ def test_set_config(tmp_path: Path) -> None: option = "test_option2" value = "test_value3" tl.set_config(tmp_path, section, option, value) - config_path = os.path.join(tmp_path, '.corrlib') + config_path = tmp_path / '.corrlib' config = ConfigParser() config.read(config_path) assert config.get('core', 'test_option2', fallback="not the value") == "test_value3" @@ -69,7 +69,7 @@ def test_get_db_file(tmp_path: Path) -> None: value = "test_value" # config is not yet available tl.set_config(tmp_path, section, option, value) - assert tl.get_db_file(tmp_path) == "test_value" + assert tl.get_db_file(tmp_path) == Path("test_value") def test_cache_enabled(tmp_path: Path) -> None: From 480c04e0692c045d5129b8822530d11443fe5b9a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 16:18:32 +0100 Subject: [PATCH 37/92] lint --- tests/sfcf_in_test.py | 2 +- tests/tools_test.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/sfcf_in_test.py b/tests/sfcf_in_test.py index 5e4ff83..7ebc94a 100644 --- a/tests/sfcf_in_test.py +++ b/tests/sfcf_in_test.py @@ -26,4 +26,4 @@ def test_get_specs() -> None: key = "f_P/q1 q2/1/0/0" specs = json.loads(input.get_specs(key, parameters)) assert specs['quarks'] == ['a', 'b'] - assert specs['wf1'][0] == [1, [0, 0]] \ No newline at end of file + assert specs['wf1'][0] == [1, [0, 0]] diff --git a/tests/tools_test.py b/tests/tools_test.py index a5427f3..541674f 100644 --- a/tests/tools_test.py +++ b/tests/tools_test.py @@ -1,6 +1,5 @@ from corrlib import tools as tl from configparser import ConfigParser -import os from pathlib import Path import pytest @@ -81,5 +80,5 @@ def test_cache_enabled(tmp_path: Path) -> None: tl.set_config(tmp_path, section, option, "False") assert not tl.cache_enabled(tmp_path) tl.set_config(tmp_path, section, option, "lalala") - with pytest.raises(ValueError) as e_info: + with pytest.raises(ValueError): tl.cache_enabled(tmp_path) From 2396a657b29482060917ecb2a99a13ee70f091ae Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 17:50:38 +0100 Subject: [PATCH 38/92] rename init_tests --- tests/{test_initialization.py => initialization_test.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_initialization.py => initialization_test.py} (100%) diff --git a/tests/test_initialization.py b/tests/initialization_test.py similarity index 100% rename from tests/test_initialization.py rename to tests/initialization_test.py From a57138dc50f1c1caca90794a14d53b11efa5165b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 18:26:17 +0100 Subject: [PATCH 39/92] add test for project alias lookup --- tests/find_test.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 tests/find_test.py diff --git a/tests/find_test.py b/tests/find_test.py new file mode 100644 index 0000000..b63b246 --- /dev/null +++ b/tests/find_test.py @@ -0,0 +1,33 @@ +import corrlib.find as find +import sqlite3 +from pathlib import Path +import corrlib.initialization as cinit +import pytest + + +def make_sql(path: Path) -> Path: + db = path / "test.db" + cinit._create_db(db) + return db + +def test_find_lookup_by_one_alias(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + assert uuid == find._project_lookup_by_alias(db, "fun_project") + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(db, "fun_project") + conn.close() From 8a8480af32a16c9bcdafea031f4951eeee3f8250 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 18:26:40 +0100 Subject: [PATCH 40/92] fix alias db --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index faef5db..4c51e05 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -28,7 +28,7 @@ def _project_lookup_by_alias(db: Path, alias: str) -> str: """ conn = sqlite3.connect(db) c = conn.cursor() - c.execute(f"SELECT * FROM 'projects' WHERE alias = '{alias}'") + c.execute(f"SELECT * FROM 'projects' WHERE aliases = '{alias}'") results = c.fetchall() conn.close() if len(results)>1: From bd34b7c3785ddb509f2fbfffbf01ce145ee0463d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 22:33:01 +0100 Subject: [PATCH 41/92] write first trivial find test --- tests/find_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/find_test.py b/tests/find_test.py index b63b246..8cc7923 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -10,6 +10,7 @@ def make_sql(path: Path) -> Path: cinit._create_db(db) return db + def test_find_lookup_by_one_alias(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) From f8566207e36978af1cefded2ae6c8fc521e732cd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 22:43:39 +0100 Subject: [PATCH 42/92] add id lookup test --- tests/find_test.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/find_test.py b/tests/find_test.py index 8cc7923..e0730e9 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -32,3 +32,25 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() + + +def test_find_lookup_by_id(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + conn.close() + result = find._project_lookup_by_id(db, uuid)[0] + assert uuid == result[0] + assert alias_str == result[1] + assert tag_str == result[2] + assert owner == result[3] + assert code == result[4] + From d0d5f9aa8775c4ce78f547d7cbe3dea5199743cb Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:37:22 +0100 Subject: [PATCH 43/92] rewrite time filter --- corrlib/find.py | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 4c51e05..e4ee735 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -8,6 +8,7 @@ from .tools import k2m, get_db_file from .tracker import get from typing import Any, Optional from pathlib import Path +import datetime as dt def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -62,8 +63,37 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: return results -def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, - created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: +def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: + drops = [] + for ind in len(results): + result = results.iloc[ind] + created_at = dt.datetime.fromisoformat(result['created_at']) + updated_at = dt.datetime.fromisoformat(result['updated_at']) + + if created_before is not None: + created_before = dt.datetime.fromisoformat(created_before) + if created_before < created_at: + drops.append(ind) + continue + if created_after is not None: + created_after = dt.datetime.fromisoformat(created_after) + if created_before > created_at: + drops.append(ind) + continue + if updated_before is not None: + updated_before = dt.datetime.fromisoformat(updated_before) + if updated_before < updated_at: + drops.append(ind) + continue + if updated_after is not None: + updated_after = dt.datetime.fromisoformat(updated_after) + if updated_after > updated_at: + drops.append(ind) + continue + return results.drop(drops) + + +def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None) -> pd.DataFrame: """ Look up a correlator record in the database by the data given to the method. @@ -105,14 +135,6 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project search_expr += f" AND code = '{code}'" if parameters: search_expr += f" AND parameters = '{parameters}'" - if created_before: - search_expr += f" AND created_at < '{created_before}'" - if created_after: - search_expr += f" AND created_at > '{created_after}'" - if updated_before: - search_expr += f" AND updated_at < '{updated_before}'" - if updated_after: - search_expr += f" AND updated_at > '{updated_after}'" conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) conn.close() @@ -236,7 +258,9 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) - results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after) + results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) + if Any([created_before, created_after, updated_before, updated_after]): + results = _time_filter(results, created_before, created_after, updated_before, updated_after) if code == "sfcf": results = sfcf_filter(results, **kwargs) elif code == "openQCD": From 29558a734b1522c94979858fb1ed0a12f8ed20d3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:38:40 +0100 Subject: [PATCH 44/92] add test for db lookup --- tests/find_test.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/find_test.py b/tests/find_test.py index e0730e9..da1bfc1 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -54,3 +54,42 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: assert owner == result[3] assert code == result[4] + +def test_db_lookup(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + + corr = "f_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 5.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + conn.close() + + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert results.empty + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert results.empty + #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) + + #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) + + From 402ca07edbecda8bb5828596e98527c9ed2de8a4 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:42:42 +0100 Subject: [PATCH 45/92] linting and hotfix --- corrlib/find.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index e4ee735..3e62344 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -65,32 +65,32 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: drops = [] - for ind in len(results): + for ind in range(len(results)): result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) if created_before is not None: - created_before = dt.datetime.fromisoformat(created_before) - if created_before < created_at: + date_created_before = dt.datetime.fromisoformat(created_before) + if date_created_before < created_at: drops.append(ind) continue if created_after is not None: - created_after = dt.datetime.fromisoformat(created_after) - if created_before > created_at: + date_created_after = dt.datetime.fromisoformat(created_after) + if date_created_after > created_at: drops.append(ind) continue if updated_before is not None: - updated_before = dt.datetime.fromisoformat(updated_before) - if updated_before < updated_at: + date_updated_before = dt.datetime.fromisoformat(updated_before) + if date_updated_before < updated_at: drops.append(ind) continue if updated_after is not None: - updated_after = dt.datetime.fromisoformat(updated_after) - if updated_after > updated_at: + date_updated_after = dt.datetime.fromisoformat(updated_after) + if date_updated_after > updated_at: drops.append(ind) continue - return results.drop(drops) + return results.drop(drops) def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None) -> pd.DataFrame: From b50ffc4c6b898de970e8ded6c2287c96a6c6389b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Mon, 23 Mar 2026 23:45:22 +0100 Subject: [PATCH 46/92] any hotfix --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index 3e62344..14b1772 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -259,7 +259,7 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) - if Any([created_before, created_after, updated_before, updated_after]): + if any(arg is not None for arg in [created_before, created_after, updated_before, updated_after]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) if code == "sfcf": results = sfcf_filter(results, **kwargs) From c431145a23764015d52ed6a1fd3da007d554cc3f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 09:23:30 +0100 Subject: [PATCH 47/92] some more db lookup --- tests/find_test.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index da1bfc1..e895b85 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -70,7 +70,6 @@ def test_db_lookup(tmp_path: Path) -> None: c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) conn.commit() - conn.close() results = find._db_lookup(db, ensemble, corr, code) assert len(results) == 1 @@ -88,8 +87,38 @@ def test_db_lookup(tmp_path: Path) -> None: assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") assert results.empty - #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) - #results = find._db_lookup(db, ensemble, corr, code, project, parameters, created_before, created_after, updated_before, updated_after) + corr = "g_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 4.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + corr = "f_A" + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert len(results) == 1 + conn.close() From 3fd557f3eebd2a57b9340b727a23f72586f6e68e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 09:24:12 +0100 Subject: [PATCH 48/92] add customtFilter --- corrlib/find.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 14b1772..8934854 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,9 +6,10 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get -from typing import Any, Optional +from typing import Any, Optional, Union from pathlib import Path import datetime as dt +from collections.abc import Callable def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -251,22 +252,31 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: return results.drop(drops) +def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: + return results + + def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, - created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: + created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, + revision: Optional[str]=None, + customFilter: Optional[Callable[[pd.DataFrame], pd.DataFrame]] = None, + **kwargs: Any) -> pd.DataFrame: db_file = get_db_file(path) db = path / db_file if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) get(path, db_file) results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) - if any(arg is not None for arg in [created_before, created_after, updated_before, updated_after]): + if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) + if customFilter is not None: + results = customFilter(results) if code == "sfcf": results = sfcf_filter(results, **kwargs) elif code == "openQCD": - pass + results = openQCD_filter(results, **kwargs) else: - raise Exception + raise ValueError(f"Code {code} is not known.") print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() From 3fe8e28a68a58a4cf8bce7a29d43f60286000c81 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 09:25:21 +0100 Subject: [PATCH 49/92] customtFilter after general filters --- corrlib/find.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 8934854..e099aea 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -269,14 +269,14 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) - if customFilter is not None: - results = customFilter(results) if code == "sfcf": results = sfcf_filter(results, **kwargs) elif code == "openQCD": results = openQCD_filter(results, **kwargs) else: raise ValueError(f"Code {code} is not known.") + if customFilter is not None: + results = customFilter(results) print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() From 4516ca3149cac8b2f0420903c41576b471b7ed8f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:39:00 +0100 Subject: [PATCH 50/92] better type annotation fir id lookup --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index e099aea..dd3a9a6 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -40,7 +40,7 @@ def _project_lookup_by_alias(db: Path, alias: str) -> str: return str(results[0][0]) -def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, str]]: +def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, ...]]: """ Return the project information available in the database by UUID. From 2fd46d452b84cf326d19d0a814a465631b5c8241 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:40:46 +0100 Subject: [PATCH 51/92] hotfix ensure that path is a Path --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index be80b6f..de19727 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path: The path holding the cached data for the given file. """ cache_path_list = file.split("/")[1:] - cache_path = path / CACHE_DIR + cache_path = Path(path) / CACHE_DIR for directory in cache_path_list: cache_path /= directory return cache_path From b8121811f967530f174202d5c67e72a0132295de Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:49:04 +0100 Subject: [PATCH 52/92] HOTFIX: hand over path as str --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index de19727..a87e227 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]: The data read from the file. """ get(path, file) - filedict: dict[str, Any] = pj.load_json_dict(path / file) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) print("> read file") return filedict From 38b4983fed0af22231b27b57dc32a4efb121a63a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:50:30 +0100 Subject: [PATCH 53/92] HOTFIX: hand over path as str 2 --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a87e227..48017a1 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = if cache_enabled(path): if not os.path.exists(cache_dir(path, file)): os.makedirs(cache_dir(path, file)) - dump_object(preloaded[file][key], cache_path(path, file, key)) + dump_object(preloaded[file][key], str(cache_path(path, file, key))) return returned_data From cc14e68b4429a122ee0c9b299555f4e7ca8fef45 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 26 Mar 2026 17:19:58 +0100 Subject: [PATCH 54/92] add tests for time filter and find project, add a first check for integrity of the database --- corrlib/find.py | 4 ++ corrlib/integrity.py | 5 ++ tests/find_test.py | 116 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 corrlib/integrity.py diff --git a/corrlib/find.py b/corrlib/find.py index dd3a9a6..3cbe09b 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,6 +6,7 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get +from .integrity import check_time_validity from typing import Any, Optional, Union from pathlib import Path import datetime as dt @@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) + db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at) + if not db_times_valid: + raise ValueError('Time stamps not valid for result with path', result["path"]) if created_before is not None: date_created_before = dt.datetime.fromisoformat(created_before) diff --git a/corrlib/integrity.py b/corrlib/integrity.py new file mode 100644 index 0000000..bf890db --- /dev/null +++ b/corrlib/integrity.py @@ -0,0 +1,5 @@ +import datetime as dt + + +def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: + return not (created_at > updated_at) diff --git a/tests/find_test.py b/tests/find_test.py index e895b85..573f87e 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -3,6 +3,8 @@ import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest +import pandas as pd +import datalad.api as dl def make_sql(path: Path) -> Path: @@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: conn.close() +def test_find_project(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + assert uuid == find.find_project(tmp_path, "fun_project") + + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") + conn.close() + + def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None: assert len(results) == 1 conn.close() + + +def test_time_filter() -> None: + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... + + data = [record_A, record_B, record_C, record_D, record_E] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') + assert len(results) == 3 + results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') + assert len(results) == 3 + results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') + assert len(results) == 1 + + results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') + assert len(results) == 2 + results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') + assert len(results) == 2 + results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') + assert len(results) == 4 + + results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') + assert len(results) == 3 + results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') + assert len(results) == 3 + results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') + assert len(results) == 1 + + results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') + assert len(results) == 2 + results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') + assert len(results) == 2 + results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') + assert len(results) == 4 + + data = [record_A, record_B, record_C, record_D, record_F] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.raises(ValueError): + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') From 81af9579dcad49d0b5c3095b0d467cf49d2282e6 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 26 Mar 2026 17:25:57 +0100 Subject: [PATCH 55/92] add a docstring for time filter --- corrlib/find.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/corrlib/find.py b/corrlib/find.py index 3cbe09b..cb85130 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -66,6 +66,22 @@ def _project_lookup_by_id(db: Path, uuid: str) -> list[tuple[str, ...]]: def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame: + """ + Filter the results from the database in terms of the creation and update times. + + Parameters + ---------- + results: pd.DataFrame + The dataframe holding the unfilteres results from the database. + created_before: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The creation date has to be truly before the date and time given. + created_after: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The creation date has to be truly after the date and time given. + updated_before: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The date of the last update has to be truly before the date and time given. + updated_after: str + Contraint on the creation date in datetime.datetime.isoformat. Note that this is exclusive. The date of the last update has to be truly after the date and time given. + """ drops = [] for ind in range(len(results)): result = results.iloc[ind] From e8360c88b938cbd3636b8b8cfa30ce0b3375e7ed Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Mar 2026 11:53:07 +0100 Subject: [PATCH 56/92] add more templates --- tests/find_test.py | 278 +++++++++++++++++++++++++++++---------------- 1 file changed, 180 insertions(+), 98 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index 573f87e..944ae5f 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -35,35 +35,6 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() - -def test_find_project(tmp_path: Path) -> None: - cinit.create(tmp_path) - db = tmp_path / "backlogger.db" - dl.unlock(str(db), dataset=str(tmp_path)) - conn = sqlite3.connect(db) - c = conn.cursor() - uuid = "test_uuid" - alias_str = "fun_project" - tag_str = "tt" - owner = "tester" - code = "test_code" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - assert uuid == find.find_project(tmp_path, "fun_project") - - uuid = "test_uuid2" - alias_str = "fun_project" - c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (uuid, alias_str, tag_str, owner, code)) - conn.commit() - - with pytest.raises(Exception): - assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") - conn.close() - - def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -85,75 +56,6 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: assert code == result[4] -def test_db_lookup(tmp_path: Path) -> None: - db = make_sql(tmp_path) - conn = sqlite3.connect(db) - c = conn.cursor() - - corr = "f_A" - ensemble = "SF_A" - code = "openQCD" - meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" - uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 5.0}" - parameter_file = "projects/Project_A/myinput.in" - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) - conn.commit() - - results = find._db_lookup(db, ensemble, corr, code) - assert len(results) == 1 - results = find._db_lookup(db, "SF_B", corr, code) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code) - assert results.empty - results = find._db_lookup(db, ensemble, corr, "sfcf") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, parameters = pars) - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") - assert results.empty - - corr = "g_A" - ensemble = "SF_A" - code = "openQCD" - meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" - uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 4.0}" - parameter_file = "projects/Project_A/myinput.in" - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", - (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) - conn.commit() - - corr = "f_A" - results = find._db_lookup(db, ensemble, corr, code) - assert len(results) == 1 - results = find._db_lookup(db, "SF_B", corr, code) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code) - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, "sfcf") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") - assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") - assert results.empty - results = find._db_lookup(db, ensemble, corr, code, parameters = pars) - assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") - assert len(results) == 1 - - conn.close() - - def test_time_filter() -> None: record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created @@ -238,3 +140,183 @@ def test_time_filter() -> None: with pytest.raises(ValueError): results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + + +def test_db_lookup(tmp_path: Path) -> None: + db = make_sql(tmp_path) + conn = sqlite3.connect(db) + c = conn.cursor() + + corr = "f_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 5.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert results.empty + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert results.empty + + corr = "g_A" + ensemble = "SF_A" + code = "openQCD" + meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" + uuid = "Project_A" + pars = "{par_A: 3.0, par_B: 4.0}" + parameter_file = "projects/Project_A/myinput.in" + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) + conn.commit() + + corr = "f_A" + results = find._db_lookup(db, ensemble, corr, code) + assert len(results) == 1 + results = find._db_lookup(db, "SF_B", corr, code) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code) + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, "sfcf") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") + assert len(results) == 1 + results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") + assert results.empty + results = find._db_lookup(db, ensemble, corr, code, parameters = pars) + assert results.empty + results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") + assert len(results) == 1 + + conn.close() + + +def test_sfcf_filter() -> None: + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_4 = [] + record_5 = [] + record_6 = [] + record_7 = [] + record_8 = [] + record_9 = [] + data = [ + record_0, + record_1, + record_2, + record_3, + record_4, + record_5, + record_6, + record_7, + record_8, + record_9, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + assert True + + +def test_openQCD_filter() -> None: + assert True + + +def test_find_record() -> None: + assert True + + +def test_find_project(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + assert uuid == find.find_project(tmp_path, "fun_project") + + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") + conn.close() + + +def test_list_projects(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + uuid = "test_uuid2" + alias_str = "fun_project2" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + uuid = "test_uuid3" + alias_str = "fun_project3" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + uuid = "test_uuid4" + alias_str = "fun_project4" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + conn.close() + results = find.list_projects(tmp_path) + assert len(results) == 4 + for i in range(4): + assert len(results[i]) == 2 From 1a1ac5121dbd623513bfaca70de0aa829352029c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Mar 2026 11:53:39 +0100 Subject: [PATCH 57/92] restructure: make code filter --- corrlib/find.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index cb85130..9b2c201 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -276,6 +276,15 @@ def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: return results +def _code_filter(results: pd.DataFrame, code: str, **kwargs: Any) -> pd.DataFrame: + if code == "sfcf": + return sfcf_filter(results, **kwargs) + elif code == "openQCD": + return openQCD_filter(results, **kwargs) + else: + raise ValueError(f"Code {code} is not known.") + + def find_record(path: Path, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, @@ -289,12 +298,7 @@ def find_record(path: Path, ensemble: str, correlator_name: str, code: str, proj results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters) if any([arg is not None for arg in [created_before, created_after, updated_before, updated_after]]): results = _time_filter(results, created_before, created_after, updated_before, updated_after) - if code == "sfcf": - results = sfcf_filter(results, **kwargs) - elif code == "openQCD": - results = openQCD_filter(results, **kwargs) - else: - raise ValueError(f"Code {code} is not known.") + results = _code_filter(results, code, **kwargs) if customFilter is not None: results = customFilter(results) print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) From f98521b5a1f6839d1624579186677ee8ce5cbf5d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 27 Mar 2026 11:56:40 +0100 Subject: [PATCH 58/92] HOTFIX: strings for pyerrors 3 --- corrlib/meas_io.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 48017a1..0f9ac02 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -59,7 +59,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str if file not in files_to_save: unlock(path, file_in_archive) files_to_save.append(file_in_archive) - known_meas = pj.load_json_dict(file, verbose=False) + known_meas = pj.load_json_dict(str(file), verbose=False) if code == "sfcf": if parameter_file is not None: parameters = sfcf.read_param(path, uuid, parameter_file) @@ -113,7 +113,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) conn.commit() - pj.dump_dict_to_json(known_meas, file) + pj.dump_dict_to_json(known_meas, str(file)) conn.close() save(path, message="Add measurements to database", files=files_to_save) return @@ -269,11 +269,11 @@ def drop_record(path: Path, meas_path: str) -> None: raise ValueError("This measurement does not exist as an entry!") conn.commit() - known_meas = pj.load_json_dict(file) + known_meas = pj.load_json_dict(str(file)) if sub_key in known_meas: del known_meas[sub_key] unlock(path, Path(file_in_archive)) - pj.dump_dict_to_json(known_meas, file) + pj.dump_dict_to_json(known_meas, str(file)) save(path, message="Drop measurements to database", files=[db, file]) return else: From 4673751dc3fd56dcb9776fb8d79e2d5b60f9e4b2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 7 Apr 2026 11:29:10 +0200 Subject: [PATCH 59/92] add docstrings for openQCD filter --- corrlib/find.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/corrlib/find.py b/corrlib/find.py index 9b2c201..d368973 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -273,10 +273,43 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: + """ + Filter for parameters of openQCD. + + Parameters + ---------- + results: pd.DataFrame + The unfiltered list of results from the database. + + Returns + ------- + results: pd.DataFrame + The filtered results. + + """ return results def _code_filter(results: pd.DataFrame, code: str, **kwargs: Any) -> pd.DataFrame: + """ + Abstraction of the filters for the different codes that are available. + At the moment, only openQCD and SFCF are known. + The possible key words for the parameters can be seen in the descriptionso f the code-specific filters. + + Parameters + ---------- + results: pd.DataFrame + The unfiltered list of results from the database. + code: str + The name of the code that produced the record at hand. + kwargs: + The keyworkd args that are handed over to the code-specific filters. + + Returns + ------- + results: pd.DataFrame + The filtered results. + """ if code == "sfcf": return sfcf_filter(results, **kwargs) elif code == "openQCD": From 8db8d46a06c76bed244bcd9df374c6060d1886ff Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 7 Apr 2026 11:40:48 +0200 Subject: [PATCH 60/92] add very simple tests or code filter and openQCD filter, fix json par strings --- tests/find_test.py | 127 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 102 insertions(+), 25 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index 944ae5f..156e5fe 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -57,17 +57,17 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: def test_time_filter() -> None: - record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created - record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated - record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later - record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] - record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier - record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... data = [record_A, record_B, record_C, record_D, record_E] @@ -172,7 +172,7 @@ def test_db_lookup(tmp_path: Path) -> None: assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert len(results) == 1 - results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") + results = find._db_lookup(db, ensemble, corr, code, parameters = '{"par_A": 3.0, "par_B": 4.0}') assert results.empty corr = "g_A" @@ -180,7 +180,7 @@ def test_db_lookup(tmp_path: Path) -> None: code = "openQCD" meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" uuid = "Project_A" - pars = "{par_A: 3.0, par_B: 4.0}" + pars = '{"par_A": 3.0, "par_B": 4.0}' parameter_file = "projects/Project_A/myinput.in" c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) @@ -205,38 +205,26 @@ def test_db_lookup(tmp_path: Path) -> None: assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert results.empty - results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") + results = find._db_lookup(db, ensemble, "g_A", code, parameters = '{"par_A": 3.0, "par_B": 4.0}') assert len(results) == 1 conn.close() def test_sfcf_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_4 = [] - record_5 = [] - record_6 = [] - record_7 = [] - record_8 = [] - record_9 = [] data = [ record_0, record_1, record_2, record_3, - record_4, - record_5, - record_6, - record_7, - record_8, - record_9, ] cols = ["name", "ensemble", @@ -253,9 +241,98 @@ def test_sfcf_filter() -> None: def test_openQCD_filter() -> None: + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + data = [ + record_0, + record_1, + record_2, + record_3, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + find.openQCD_filter(df) assert True +def test_code_filter() -> None: + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_4 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_5 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_6 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_7 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_8 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + data = [ + record_0, + record_1, + record_2, + record_3, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + res = find._code_filter(df, "sfcf") + assert len(res) == 4 + + data = [ + record_4, + record_5, + record_6, + record_7, + record_8, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + res = find._code_filter(df, "openQCD") + assert len(res) == 5 + with pytest.raises(ValueError): + res = find._code_filter(df, "asdf") + + def test_find_record() -> None: assert True From 3a1e41808b00763ec270ef32700a9ba45fcf74ee Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 8 Apr 2026 17:26:38 +0200 Subject: [PATCH 61/92] correct minor typos in doc --- corrlib/find.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index d368973..660e4bf 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -179,9 +179,9 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: qk2: float, optional Mass parameter $\kappa_2$ of the first quark. qm1: float, optional - Bare quak mass $m_1$ of the first quark. + Bare quark mass $m_1$ of the first quark. qm2: float, optional - Bare quak mass $m_1$ of the first quark. + Bare quark mass $m_2$ of the first quark. quarks_thetas: list[list[float]], optional wf1: optional wf2: optional From 7275fdd4f33c67d0927ca65990e59865ce77cac2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 8 Apr 2026 17:28:54 +0200 Subject: [PATCH 62/92] remove unnecessary output when results are empty --- corrlib/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/corrlib/cli.py b/corrlib/cli.py index b28692a..6c1c3c5 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -108,6 +108,8 @@ def find( Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. """ results = find_record(path, ensemble, corr, code) + if results.empty: + return if arg == 'all': print(results) else: From 5c37c06b1356ab7c53dfdc1524470d5cd4df330b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 09:54:39 +0200 Subject: [PATCH 63/92] add an implementation to read the first ~200 bytes of the par file of openQCD's qcd2 --- corrlib/input/openQCD.py | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index a3bce6f..9c5fbbc 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -4,6 +4,7 @@ import os import fnmatch from typing import Any, Optional from pathlib import Path +import struct def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: @@ -304,3 +305,58 @@ def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, A t1_dict[param["type"]] = {} t1_dict[param["type"]][pars] = t0 return t1_dict + + +def read_par_file(fname: str) -> dict[str, dict[str, Any]]: + + def _qcd2_write_lat_parms() -> dict[str, Any]: + lat_pars = {} + + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] + kappas = [] + m0s = [] + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + def _qcd2_write_bc_parms() -> dict[str, Any]: + bc_pars = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) + bc_pars["cF"] = list(bc_parms[2:4]) + phi = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars + + with open(fname, "rb") as fp: + lat_par_dict = _qcd2_write_lat_parms() + bc_par_dict = _qcd2_write_bc_parms() + fp.close() + par_dict = {} + par_dict["lat"] = lat_par_dict + par_dict["bc"] = bc_par_dict + return par_dict From 5ea832675702ae6715b07215de120315657ca03d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 10:26:47 +0200 Subject: [PATCH 64/92] add thin wrapper to accomodate for input conventions, add comments --- corrlib/input/openQCD.py | 62 +++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 9c5fbbc..1d36e7f 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -307,27 +307,42 @@ def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, A return t1_dict -def read_par_file(fname: str) -> dict[str, dict[str, Any]]: +def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: + """ + The subroutines written here have names according to the openQCD programs and functions that write out the data. + Parameters + ---------- + fname: Path + Location of the parameter file. + + Returns + ------- + par_dict: dict + Dictionary holding the parameters specified in the given file. + """ def _qcd2_write_lat_parms() -> dict[str, Any]: + """ + Unpack the lattice parameters written by write_lat_parms. + """ lat_pars = {} - t = fp.read(16) lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends t = fp.read(8) - nk, isw = struct.unpack('ii', t) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter lat_pars["nk"] = nk lat_pars["isw"] = isw t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] + lat_pars["beta"] = struct.unpack('d', t)[0] # beta t = fp.read(8) lat_pars["c0"] = struct.unpack('d', t)[0] t = fp.read(8) lat_pars["c1"] = struct.unpack('d', t)[0] t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor kappas = [] m0s = [] + # read kappas for ik in range(nk): t = fp.read(8) kappas.append(struct.unpack('d', t)[0]) @@ -338,14 +353,17 @@ def read_par_file(fname: str) -> dict[str, dict[str, Any]]: return lat_pars def _qcd2_write_bc_parms() -> dict[str, Any]: - bc_pars = {} + """ + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries t = fp.read(104) bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) - bc_pars["cF"] = list(bc_parms[2:4]) - phi = [[], []] + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] phi[0] = list(bc_parms[4:7]) phi[1] = list(bc_parms[7:10]) bc_pars["phi"] = phi @@ -360,3 +378,27 @@ def read_par_file(fname: str) -> dict[str, dict[str, Any]]: par_dict["lat"] = lat_par_dict par_dict["bc"] = bc_par_dict return par_dict + + +def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: + """ + Thin wrapper around read_qcd2_par_file, getting the file before reading. + + Parameters + ---------- + path: Path + Path of the corrlib repository. + project: str + UUID of the project of the parameter-file. + file_in_project: str + The loaction of the file in the project directory. + + Returns + ------- + par_dict: dict + The dict with the parameters read from the .par-file. + """ + fname = path / "projects" / project / file_in_project + ds = os.path.join(path, "projects", project) + dl.get(fname, dataset=ds) + return read_qcd2_par_file(fname) From 71332264cf36ad27c9a9d840c7f14c86ba7835ce Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 10:47:19 +0200 Subject: [PATCH 65/92] restruct: give each openQCD prog it's own file --- corrlib/input/openQCD.py | 103 +++++++++---------------------- corrlib/pars/openQCD/__init__.py | 3 + corrlib/pars/openQCD/ms1.py | 81 ++++++++++++++++++++++++ corrlib/pars/openQCD/qcd2.py | 77 +++++++++++++++++++++++ 4 files changed, 189 insertions(+), 75 deletions(-) create mode 100644 corrlib/pars/openQCD/__init__.py create mode 100644 corrlib/pars/openQCD/ms1.py create mode 100644 corrlib/pars/openQCD/qcd2.py diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 1d36e7f..8a2b41e 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -4,7 +4,9 @@ import os import fnmatch from typing import Any, Optional from pathlib import Path -import struct +from ..pars.openQCD import ms1 +from ..pars.openQCD import qcd2 + def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: @@ -307,79 +309,6 @@ def extract_t1(path: Path, project: str, dir_in_project: str, param: dict[str, A return t1_dict -def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: - """ - The subroutines written here have names according to the openQCD programs and functions that write out the data. - - Parameters - ---------- - fname: Path - Location of the parameter file. - - Returns - ------- - par_dict: dict - Dictionary holding the parameters specified in the given file. - """ - def _qcd2_write_lat_parms() -> dict[str, Any]: - """ - Unpack the lattice parameters written by write_lat_parms. - """ - lat_pars = {} - t = fp.read(16) - lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends - t = fp.read(8) - nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter - lat_pars["nk"] = nk - lat_pars["isw"] = isw - t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] # beta - t = fp.read(8) - lat_pars["c0"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["c1"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor - kappas = [] - m0s = [] - # read kappas - for ik in range(nk): - t = fp.read(8) - kappas.append(struct.unpack('d', t)[0]) - t = fp.read(8) - m0s.append(struct.unpack('d', t)[0]) - lat_pars["kappas"] = kappas - lat_pars["m0s"] = m0s - return lat_pars - - def _qcd2_write_bc_parms() -> dict[str, Any]: - """ - Unpack the boundary parameters written by write_bc_parms. - """ - bc_pars: dict[str, Any] = {} - t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries - t = fp.read(104) - bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement - bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement - phi: list[list[float]] = [[], []] - phi[0] = list(bc_parms[4:7]) - phi[1] = list(bc_parms[7:10]) - bc_pars["phi"] = phi - bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars - - with open(fname, "rb") as fp: - lat_par_dict = _qcd2_write_lat_parms() - bc_par_dict = _qcd2_write_bc_parms() - fp.close() - par_dict = {} - par_dict["lat"] = lat_par_dict - par_dict["bc"] = bc_par_dict - return par_dict - - def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Thin wrapper around read_qcd2_par_file, getting the file before reading. @@ -401,4 +330,28 @@ def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, fname = path / "projects" / project / file_in_project ds = os.path.join(path, "projects", project) dl.get(fname, dataset=ds) - return read_qcd2_par_file(fname) + return qcd2.read_qcd2_par_file(fname) + + +def load_ms1_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: + """ + Thin wrapper around read_qcd2_ms1_par_file, getting the file before reading. + + Parameters + ---------- + path: Path + Path of the corrlib repository. + project: str + UUID of the project of the parameter-file. + file_in_project: str + The loaction of the file in the project directory. + + Returns + ------- + par_dict: dict + The dict with the parameters read from the .par-file. + """ + fname = path / "projects" / project / file_in_project + ds = os.path.join(path, "projects", project) + dl.get(fname, dataset=ds) + return ms1.read_qcd2_ms1_par_file(fname) diff --git a/corrlib/pars/openQCD/__init__.py b/corrlib/pars/openQCD/__init__.py new file mode 100644 index 0000000..edbac71 --- /dev/null +++ b/corrlib/pars/openQCD/__init__.py @@ -0,0 +1,3 @@ + +from . import ms1 as ms1 +from . import qcd2 as qcd2 diff --git a/corrlib/pars/openQCD/ms1.py b/corrlib/pars/openQCD/ms1.py new file mode 100644 index 0000000..9aabc54 --- /dev/null +++ b/corrlib/pars/openQCD/ms1.py @@ -0,0 +1,81 @@ +import struct + +from typing import Any +from pathlib import Path + + +def read_qcd2_ms1_par_file(fname: Path) -> dict[str, dict[str, Any]]: + """ + The subroutines written here have names according to the openQCD programs and functions that write out the data. + Parameters + ---------- + fname: Path + Location of the parameter file. + + Returns + ------- + par_dict: dict + Dictionary holding the parameters specified in the given file. + """ + + def _qcd2_write_lat_parms() -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the lattice parameters written by write_lat_parms. + """ + lat_pars = {} + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] # beta + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor + kappas = [] + m0s = [] + # read kappas + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + def _qcd2_write_bc_parms() -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars + + with open(fname, "rb") as fp: + lat_par_dict = _qcd2_write_lat_parms() + bc_par_dict = _qcd2_write_bc_parms() + fp.close() + par_dict = {} + par_dict["lat"] = lat_par_dict + par_dict["bc"] = bc_par_dict + return par_dict + + diff --git a/corrlib/pars/openQCD/qcd2.py b/corrlib/pars/openQCD/qcd2.py new file mode 100644 index 0000000..9d63689 --- /dev/null +++ b/corrlib/pars/openQCD/qcd2.py @@ -0,0 +1,77 @@ +import struct + +from pathlib import Path +from typing import Any + + +def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: + """ + The subroutines written here have names according to the openQCD programs and functions that write out the data. + + Parameters + ---------- + fname: Path + Location of the parameter file. + + Returns + ------- + par_dict: dict + Dictionary holding the parameters specified in the given file. + """ + def _qcd2_write_lat_parms() -> dict[str, Any]: + """ + Unpack the lattice parameters written by write_lat_parms. + """ + lat_pars = {} + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] # beta + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor + kappas = [] + m0s = [] + # read kappas + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + def _qcd2_write_bc_parms() -> dict[str, Any]: + """ + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars + + with open(fname, "rb") as fp: + lat_par_dict = _qcd2_write_lat_parms() + bc_par_dict = _qcd2_write_bc_parms() + fp.close() + par_dict = {} + par_dict["lat"] = lat_par_dict + par_dict["bc"] = bc_par_dict + return par_dict From e654d7c1bb6b4948eb43037977142358c37dcd7b Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:10:54 +0200 Subject: [PATCH 66/92] restruct: introduce a file for flags --- corrlib/pars/openQCD/flags.py | 59 +++++++++++++++++++++++++++++++++++ corrlib/pars/openQCD/ms1.py | 57 ++------------------------------- corrlib/pars/openQCD/qcd2.py | 55 ++------------------------------ 3 files changed, 65 insertions(+), 106 deletions(-) create mode 100644 corrlib/pars/openQCD/flags.py diff --git a/corrlib/pars/openQCD/flags.py b/corrlib/pars/openQCD/flags.py new file mode 100644 index 0000000..2e4ab17 --- /dev/null +++ b/corrlib/pars/openQCD/flags.py @@ -0,0 +1,59 @@ +""" +Reconstruct the outputs of flags. +""" + +import struct +from typing import Any, BinaryIO + +# lat_parms.c +def lat_parms_write_lat_parms(fp: BinaryIO) -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the lattice parameters written by write_lat_parms. + """ + lat_pars = {} + t = fp.read(16) + lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends + t = fp.read(8) + nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter + lat_pars["nk"] = nk + lat_pars["isw"] = isw + t = fp.read(8) + lat_pars["beta"] = struct.unpack('d', t)[0] # beta + t = fp.read(8) + lat_pars["c0"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["c1"] = struct.unpack('d', t)[0] + t = fp.read(8) + lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor + kappas = [] + m0s = [] + # read kappas + for ik in range(nk): + t = fp.read(8) + kappas.append(struct.unpack('d', t)[0]) + t = fp.read(8) + m0s.append(struct.unpack('d', t)[0]) + lat_pars["kappas"] = kappas + lat_pars["m0s"] = m0s + return lat_pars + + +def lat_parms_write_bc_parms(fp: BinaryIO) -> dict[str, Any]: + """ + NOTE: This is a duplcation from qcd2. + Unpack the boundary parameters written by write_bc_parms. + """ + bc_pars: dict[str, Any] = {} + t = fp.read(4) + bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries + t = fp.read(104) + bc_parms = struct.unpack('d'*13, t) + bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement + bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement + phi: list[list[float]] = [[], []] + phi[0] = list(bc_parms[4:7]) + phi[1] = list(bc_parms[7:10]) + bc_pars["phi"] = phi + bc_pars["theta"] = list(bc_parms[10:]) + return bc_pars \ No newline at end of file diff --git a/corrlib/pars/openQCD/ms1.py b/corrlib/pars/openQCD/ms1.py index 9aabc54..4c2aed5 100644 --- a/corrlib/pars/openQCD/ms1.py +++ b/corrlib/pars/openQCD/ms1.py @@ -1,4 +1,4 @@ -import struct +from . import flags from typing import Any from pathlib import Path @@ -18,60 +18,9 @@ def read_qcd2_ms1_par_file(fname: Path) -> dict[str, dict[str, Any]]: Dictionary holding the parameters specified in the given file. """ - def _qcd2_write_lat_parms() -> dict[str, Any]: - """ - NOTE: This is a duplcation from qcd2. - Unpack the lattice parameters written by write_lat_parms. - """ - lat_pars = {} - t = fp.read(16) - lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends - t = fp.read(8) - nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter - lat_pars["nk"] = nk - lat_pars["isw"] = isw - t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] # beta - t = fp.read(8) - lat_pars["c0"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["c1"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor - kappas = [] - m0s = [] - # read kappas - for ik in range(nk): - t = fp.read(8) - kappas.append(struct.unpack('d', t)[0]) - t = fp.read(8) - m0s.append(struct.unpack('d', t)[0]) - lat_pars["kappas"] = kappas - lat_pars["m0s"] = m0s - return lat_pars - - def _qcd2_write_bc_parms() -> dict[str, Any]: - """ - NOTE: This is a duplcation from qcd2. - Unpack the boundary parameters written by write_bc_parms. - """ - bc_pars: dict[str, Any] = {} - t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries - t = fp.read(104) - bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement - bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement - phi: list[list[float]] = [[], []] - phi[0] = list(bc_parms[4:7]) - phi[1] = list(bc_parms[7:10]) - bc_pars["phi"] = phi - bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars - with open(fname, "rb") as fp: - lat_par_dict = _qcd2_write_lat_parms() - bc_par_dict = _qcd2_write_bc_parms() + lat_par_dict = flags.lat_parms_write_lat_parms(fp) + bc_par_dict = flags.lat_parms_write_bc_parms(fp) fp.close() par_dict = {} par_dict["lat"] = lat_par_dict diff --git a/corrlib/pars/openQCD/qcd2.py b/corrlib/pars/openQCD/qcd2.py index 9d63689..3b6e277 100644 --- a/corrlib/pars/openQCD/qcd2.py +++ b/corrlib/pars/openQCD/qcd2.py @@ -1,9 +1,8 @@ -import struct +from . import flags from pathlib import Path from typing import Any - def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: """ The subroutines written here have names according to the openQCD programs and functions that write out the data. @@ -18,58 +17,10 @@ def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: par_dict: dict Dictionary holding the parameters specified in the given file. """ - def _qcd2_write_lat_parms() -> dict[str, Any]: - """ - Unpack the lattice parameters written by write_lat_parms. - """ - lat_pars = {} - t = fp.read(16) - lat_pars["N"] = list(struct.unpack('iiii', t)) # lattice extends - t = fp.read(8) - nk, isw = struct.unpack('ii', t) # number of kappas and isw parameter - lat_pars["nk"] = nk - lat_pars["isw"] = isw - t = fp.read(8) - lat_pars["beta"] = struct.unpack('d', t)[0] # beta - t = fp.read(8) - lat_pars["c0"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["c1"] = struct.unpack('d', t)[0] - t = fp.read(8) - lat_pars["csw"] = struct.unpack('d', t)[0] # csw factor - kappas = [] - m0s = [] - # read kappas - for ik in range(nk): - t = fp.read(8) - kappas.append(struct.unpack('d', t)[0]) - t = fp.read(8) - m0s.append(struct.unpack('d', t)[0]) - lat_pars["kappas"] = kappas - lat_pars["m0s"] = m0s - return lat_pars - - def _qcd2_write_bc_parms() -> dict[str, Any]: - """ - Unpack the boundary parameters written by write_bc_parms. - """ - bc_pars: dict[str, Any] = {} - t = fp.read(4) - bc_pars["type"] = struct.unpack('i', t)[0] # type of hte boundaries - t = fp.read(104) - bc_parms = struct.unpack('d'*13, t) - bc_pars["cG"] = list(bc_parms[:2]) # boundary gauge field improvement - bc_pars["cF"] = list(bc_parms[2:4]) # boundary fermion field improvement - phi: list[list[float]] = [[], []] - phi[0] = list(bc_parms[4:7]) - phi[1] = list(bc_parms[7:10]) - bc_pars["phi"] = phi - bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars with open(fname, "rb") as fp: - lat_par_dict = _qcd2_write_lat_parms() - bc_par_dict = _qcd2_write_bc_parms() + lat_par_dict = flags.lat_parms_qcd2_write_lat_parms(fp) + bc_par_dict = flags.lat_parms_qcd2_write_bc_parms(fp) fp.close() par_dict = {} par_dict["lat"] = lat_par_dict From 9498c1dd735ea498207eedafe4cb37f458c008c2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:11:48 +0200 Subject: [PATCH 67/92] correct function names --- corrlib/pars/openQCD/qcd2.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/corrlib/pars/openQCD/qcd2.py b/corrlib/pars/openQCD/qcd2.py index 3b6e277..e73c156 100644 --- a/corrlib/pars/openQCD/qcd2.py +++ b/corrlib/pars/openQCD/qcd2.py @@ -3,6 +3,7 @@ from . import flags from pathlib import Path from typing import Any + def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: """ The subroutines written here have names according to the openQCD programs and functions that write out the data. @@ -19,8 +20,8 @@ def read_qcd2_par_file(fname: Path) -> dict[str, dict[str, Any]]: """ with open(fname, "rb") as fp: - lat_par_dict = flags.lat_parms_qcd2_write_lat_parms(fp) - bc_par_dict = flags.lat_parms_qcd2_write_bc_parms(fp) + lat_par_dict = flags.lat_parms_write_lat_parms(fp) + bc_par_dict = flags.lat_parms_write_bc_parms(fp) fp.close() par_dict = {} par_dict["lat"] = lat_par_dict From 8394b1fdbdaeaf0e647b7e568c9338f0a4a3a166 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:23:28 +0200 Subject: [PATCH 68/92] rename functions, let write_measurement decide which file type is given --- corrlib/input/openQCD.py | 6 +++--- corrlib/meas_io.py | 7 +++++-- corrlib/toml.py | 10 +++++++--- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/corrlib/input/openQCD.py b/corrlib/input/openQCD.py index 8a2b41e..879b555 100644 --- a/corrlib/input/openQCD.py +++ b/corrlib/input/openQCD.py @@ -9,7 +9,7 @@ from ..pars.openQCD import qcd2 -def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: +def load_ms1_infile(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms1 measurements from a parameter file in the project. @@ -73,7 +73,7 @@ def read_ms1_param(path: Path, project: str, file_in_project: str) -> dict[str, return param -def read_ms3_param(path: Path, project: str, file_in_project: str) -> dict[str, Any]: +def load_ms3_infile(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Read the parameters for ms3 measurements from a parameter file in the project. @@ -333,7 +333,7 @@ def load_qcd2_pars(path: Path, project: str, file_in_project: str) -> dict[str, return qcd2.read_qcd2_par_file(fname) -def load_ms1_pars(path: Path, project: str, file_in_project: str) -> dict[str, Any]: +def load_ms1_parfile(path: Path, project: str, file_in_project: str) -> dict[str, Any]: """ Thin wrapper around read_qcd2_ms1_par_file, getting the file before reading. diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 0f9ac02..a48f546 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -74,7 +74,10 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str ms_type = list(measurement.keys())[0] if ms_type == 'ms1': if parameter_file is not None: - parameters = openQCD.read_ms1_param(path, uuid, parameter_file) + if parameter_file.endswith(".ms1.in"): + parameters = openQCD.load_ms1_infile(path, uuid, parameter_file) + elif parameter_file.endswith(".ms1.par"): + parameters = openQCD.load_ms1_parfile(path, uuid, parameter_file) else: raise Exception("Need parameter file for this code!") pars = {} @@ -88,7 +91,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str pars[subkey] = json.dumps(parameters["rw_fcts"][i]) elif ms_type in ['t0', 't1']: if parameter_file is not None: - parameters = openQCD.read_ms3_param(path, uuid, parameter_file) + parameters = openQCD.load_ms3_infile(path, uuid, parameter_file) else: parameters = {} for rwp in ["integrator", "eps", "ntot", "dnms"]: diff --git a/corrlib/toml.py b/corrlib/toml.py index add3739..93ba0f3 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -192,12 +192,16 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: elif project['code'] == 'openQCD': if md['measurement'] == 'ms1': - param = openQCD.read_ms1_param(path, uuid, md['param_file']) + parameter_file = md['param_file'] + if parameter_file.endswith(".ms1.in"): + param = openQCD.load_ms1_infile(path, uuid, parameter_file) + elif parameter_file.endswith(".ms1.par"): + param = openQCD.load_ms1_parfile(path, uuid, parameter_file) param['type'] = 'ms1' measurement = openQCD.read_rwms(path, uuid, md['path'], param, md["prefix"], version=md["version"], names=md['names'], files=md['files']) elif md['measurement'] == 't0': if 'param_file' in md: - param = openQCD.read_ms3_param(path, uuid, md['param_file']) + param = openQCD.load_ms3_infile(path, uuid, md['param_file']) else: param = {} for rwp in ["integrator", "eps", "ntot", "dnms"]: @@ -207,7 +211,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) elif md['measurement'] == 't1': if 'param_file' in md: - param = openQCD.read_ms3_param(path, uuid, md['param_file']) + param = openQCD.load_ms3_infile(path, uuid, md['param_file']) param['type'] = 't1' measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) From 26607632328e4615fce343f7d700baddaad9fdb2 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 11:24:25 +0200 Subject: [PATCH 69/92] lint --- corrlib/pars/openQCD/flags.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/pars/openQCD/flags.py b/corrlib/pars/openQCD/flags.py index 2e4ab17..95be919 100644 --- a/corrlib/pars/openQCD/flags.py +++ b/corrlib/pars/openQCD/flags.py @@ -56,4 +56,4 @@ def lat_parms_write_bc_parms(fp: BinaryIO) -> dict[str, Any]: phi[1] = list(bc_parms[7:10]) bc_pars["phi"] = phi bc_pars["theta"] = list(bc_parms[10:]) - return bc_pars \ No newline at end of file + return bc_pars From 3b6a8be0cc1d1ed1e64b014d7b88d96c2dcc446f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:12:38 +0200 Subject: [PATCH 70/92] TEMPFIX: allow ms1 to not have an in or par file --- corrlib/meas_io.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a48f546..82f2e7d 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -79,7 +79,19 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str elif parameter_file.endswith(".ms1.par"): parameters = openQCD.load_ms1_parfile(path, uuid, parameter_file) else: - raise Exception("Need parameter file for this code!") + # Temporary solution + parameters = {} + parameters["rand"] = {} + for nrw in range(1): + if "nsrc" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["nsrc"] = 1 + if "mu" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["mu"] = "None" + if "np" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["np"] = "None" + if "irp" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["irp"] = "None" + # raise Exception("Need parameter file for this code!") pars = {} subkeys = [] for i in range(len(parameters["rw_fcts"])): From 9b628abc69f0ac44f5c2fd392be1da1c48a85d6c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:16:11 +0200 Subject: [PATCH 71/92] TEMPFIX: allow ms1 to not have an in or par file part 2 --- corrlib/toml.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index 93ba0f3..14ec058 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -192,11 +192,26 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: elif project['code'] == 'openQCD': if md['measurement'] == 'ms1': - parameter_file = md['param_file'] - if parameter_file.endswith(".ms1.in"): - param = openQCD.load_ms1_infile(path, uuid, parameter_file) - elif parameter_file.endswith(".ms1.par"): - param = openQCD.load_ms1_parfile(path, uuid, parameter_file) + if 'param_file' in md.keys(): + parameter_file = md['param_file'] + if parameter_file.endswith(".ms1.in"): + param = openQCD.load_ms1_infile(path, uuid, parameter_file) + elif parameter_file.endswith(".ms1.par"): + param = openQCD.load_ms1_parfile(path, uuid, parameter_file) + else: + # Temporary solution + parameters = {} + parameters["rand"] = {} + for nrw in range(1): + if "nsrc" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["nsrc"] = 1 + if "mu" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["mu"] = "None" + if "np" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["np"] = "None" + if "irp" not in parameters["rw_fcts"][nrw]: + parameters["rw_fcts"][nrw]["irp"] = "None" + param = parameters param['type'] = 'ms1' measurement = openQCD.read_rwms(path, uuid, md['path'], param, md["prefix"], version=md["version"], names=md['names'], files=md['files']) elif md['measurement'] == 't0': From 16dcca3f3dea23022e56e20e03b7ff9c14ed04bc Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:17:29 +0200 Subject: [PATCH 72/92] TEMPFIX: allow ms1 to not have an in or par file part 3 --- corrlib/toml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/corrlib/toml.py b/corrlib/toml.py index 14ec058..f0ba525 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -202,6 +202,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: # Temporary solution parameters = {} parameters["rand"] = {} + parameters["rw_fcts"] = [] for nrw in range(1): if "nsrc" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["nsrc"] = 1 From e3be65beec6c7bbb4fef67098c51ebe502bde125 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 12:18:59 +0200 Subject: [PATCH 73/92] TEMPFIX: allow ms1 to not have an in or par file part 4 --- corrlib/meas_io.py | 2 +- corrlib/toml.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 82f2e7d..f4e8a83 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -82,6 +82,7 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str # Temporary solution parameters = {} parameters["rand"] = {} + parameters["rw_fcts"] = [{}] for nrw in range(1): if "nsrc" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["nsrc"] = 1 @@ -91,7 +92,6 @@ def write_measurement(path: Path, ensemble: str, measurement: dict[str, dict[str parameters["rw_fcts"][nrw]["np"] = "None" if "irp" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["irp"] = "None" - # raise Exception("Need parameter file for this code!") pars = {} subkeys = [] for i in range(len(parameters["rw_fcts"])): diff --git a/corrlib/toml.py b/corrlib/toml.py index f0ba525..eb40d5a 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -202,7 +202,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: # Temporary solution parameters = {} parameters["rand"] = {} - parameters["rw_fcts"] = [] + parameters["rw_fcts"] = [{}] for nrw in range(1): if "nsrc" not in parameters["rw_fcts"][nrw]: parameters["rw_fcts"][nrw]["nsrc"] = 1 From e95edcb0932815e352011da5a69f297a92bfedd1 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 9 Apr 2026 23:27:31 +0200 Subject: [PATCH 74/92] restruct for easier tests, test drop of sfcf params --- corrlib/find.py | 149 +++++++++++++++++++++++---------------------- tests/find_test.py | 93 +++++++++++++--------------- 2 files changed, 116 insertions(+), 126 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 660e4bf..9d07a1c 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -162,6 +162,78 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project return results +def _sfcf_drop(param, **kwargs): + if 'offset' in kwargs: + if kwargs.get('offset') != param['offset']: + return True + if 'quark_kappas' in kwargs: + kappas = kwargs['quark_kappas'] + if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): + return True + if 'quark_masses' in kwargs: + masses = kwargs['quark_masses'] + if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): + return True + if 'qk1' in kwargs: + quark_kappa1 = kwargs['qk1'] + if not isinstance(quark_kappa1, list): + if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): + return True + else: + if len(quark_kappa1) == 2: + if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): + return True + else: + raise ValueError("quark_kappa1 has to have length 2") + if 'qk2' in kwargs: + quark_kappa2 = kwargs['qk2'] + if not isinstance(quark_kappa2, list): + if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): + return True + else: + if len(quark_kappa2) == 2: + if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): + return True + else: + raise ValueError("quark_kappa2 has to have length 2") + if 'qm1' in kwargs: + quark_mass1 = kwargs['qm1'] + if not isinstance(quark_mass1, list): + if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): + return True + else: + if len(quark_mass1) == 2: + if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): + return True + else: + raise ValueError("quark_mass1 has to have length 2") + if 'qm2' in kwargs: + quark_mass2 = kwargs['qm2'] + if not isinstance(quark_mass2, list): + if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): + return True + else: + if len(quark_mass2) == 2: + if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): + return True + else: + raise ValueError("quark_mass2 has to have length 2") + if 'quark_thetas' in kwargs: + quark_thetas = kwargs['quark_thetas'] + if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): + return True + # careful, this is not save, when multiple contributions are present! + if 'wf1' in kwargs: + wf1 = kwargs['wf1'] + if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): + return True + if 'wf2' in kwargs: + wf2 = kwargs['wf2'] + if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): + return True + return False + + def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: r""" Filter method for the Database entries holding SFCF calculations. @@ -191,84 +263,13 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame: results: pd.DataFrame The filtered DataFrame, only holding the records that fit to the parameters given. """ + drops = [] for ind in range(len(results)): result = results.iloc[ind] param = json.loads(result['parameters']) - if 'offset' in kwargs: - if kwargs.get('offset') != param['offset']: - drops.append(ind) - continue - if 'quark_kappas' in kwargs: - kappas = kwargs['quark_kappas'] - if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])): - drops.append(ind) - continue - if 'quark_masses' in kwargs: - masses = kwargs['quark_masses'] - if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))): - drops.append(ind) - continue - if 'qk1' in kwargs: - quark_kappa1 = kwargs['qk1'] - if not isinstance(quark_kappa1, list): - if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])): - drops.append(ind) - continue - else: - if len(quark_kappa1) == 2: - if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']): - drops.append(ind) - continue - if 'qk2' in kwargs: - quark_kappa2 = kwargs['qk2'] - if not isinstance(quark_kappa2, list): - if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])): - drops.append(ind) - continue - else: - if len(quark_kappa2) == 2: - if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']): - drops.append(ind) - continue - if 'qm1' in kwargs: - quark_mass1 = kwargs['qm1'] - if not isinstance(quark_mass1, list): - if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))): - drops.append(ind) - continue - else: - if len(quark_mass1) == 2: - if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])): - drops.append(ind) - continue - if 'qm2' in kwargs: - quark_mass2 = kwargs['qm2'] - if not isinstance(quark_mass2, list): - if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))): - drops.append(ind) - continue - else: - if len(quark_mass2) == 2: - if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])): - drops.append(ind) - continue - if 'quark_thetas' in kwargs: - quark_thetas = kwargs['quark_thetas'] - if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']): - drops.append(ind) - continue - # careful, this is not save, when multiple contributions are present! - if 'wf1' in kwargs: - wf1 = kwargs['wf1'] - if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)): - drops.append(ind) - continue - if 'wf2' in kwargs: - wf2 = kwargs['wf2'] - if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)): - drops.append(ind) - continue + if _sfcf_drop(param, **kwargs): + drops.append(ind) return results.drop(drops) diff --git a/tests/find_test.py b/tests/find_test.py index 156e5fe..36d687e 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -211,62 +211,51 @@ def test_db_lookup(tmp_path: Path) -> None: conn.close() -def test_sfcf_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - data = [ - record_0, - record_1, - record_2, - record_3, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) +def test_sfcf_drop() -> None: + parameters0 = { + 'offset': [0,0,0], + 'quarks': [{'mass': 1, 'thetas': [0,0,0]}, {'mass': 2, 'thetas': [0,0,1]}], # m0s = -3.5, -3.75 + 'wf1': [[1, [0, 0]], [0.5, [1, 0]], [.75, [.5, .5]]], + 'wf2': [[1, [2, 1]], [2, [0.5, -0.5]], [.5, [.75, .72]]], + } - assert True + assert not find._sfcf_drop(parameters0, offset=[0,0,0]) + assert find._sfcf_drop(parameters0, offset=[1,0,0]) + + assert not find._sfcf_drop(parameters0, quark_kappas = [1, 2]) + assert find._sfcf_drop(parameters0, quark_kappas = [-3.1, -3.72]) + + assert not find._sfcf_drop(parameters0, quark_masses = [-3.5, -3.75]) + assert find._sfcf_drop(parameters0, quark_masses = [-3.1, -3.72]) + + assert not find._sfcf_drop(parameters0, qk1 = 1) + assert not find._sfcf_drop(parameters0, qk2 = 2) + assert find._sfcf_drop(parameters0, qk1 = 2) + assert find._sfcf_drop(parameters0, qk2 = 1) + + assert not find._sfcf_drop(parameters0, qk1 = [0.5,1.5]) + assert not find._sfcf_drop(parameters0, qk2 = [1.5,2.5]) + assert find._sfcf_drop(parameters0, qk1 = 2) + assert find._sfcf_drop(parameters0, qk2 = 1) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qk1 = [0.5,1,5]) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qk2 = [1,5,2.5]) + + assert find._sfcf_drop(parameters0, qm1 = 1.2) + assert find._sfcf_drop(parameters0, qm2 = 2.2) + assert not find._sfcf_drop(parameters0, qm1 = -3.5) + assert not find._sfcf_drop(parameters0, qm2 = -3.75) + + assert find._sfcf_drop(parameters0, qm2 = 1.2) + assert find._sfcf_drop(parameters0, qm1 = 2.2) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qm1 = [0.5,1,5]) + with pytest.raises(ValueError): + assert not find._sfcf_drop(parameters0, qm2 = [1,5,2.5]) def test_openQCD_filter() -> None: - record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] - data = [ - record_0, - record_1, - record_2, - record_3, - ] - cols = ["name", - "ensemble", - "code", - "path", - "project", - "parameters", - "parameter_file", - "created_at", - "updated_at"] - df = pd.DataFrame(data,columns=cols) - - find.openQCD_filter(df) assert True From 6e886aa06d638a9af4b32f6351d0534e65f70c99 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 10 Apr 2026 09:16:46 +0200 Subject: [PATCH 75/92] add counter for measurements in tomls --- corrlib/toml.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index eb40d5a..e2e257d 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -178,8 +178,10 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: update_aliases(path, uuid, aliases) else: uuid = import_project(path, project['url'], aliases=aliases) + imeas = 1 + nmeas = len(measurements.keys()) for mname, md in measurements.items(): - print("Import measurement: " + mname) + print(f"Import measurement {imeas}/{nmeas}: {mname}") ensemble = md['ensemble'] if project['code'] == 'sfcf': param = sfcf.read_param(path, uuid, md['param_file']) @@ -232,6 +234,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: measurement = openQCD.extract_t1(path, uuid, md['path'], param, str(md["prefix"]), int(md["dtr_read"]), int(md["xmin"]), int(md["spatial_extent"]), fit_range=int(md.get('fit_range', 5)), postfix=str(md.get('postfix', '')), names=md.get('names', []), files=md.get('files', [])) write_measurement(path, ensemble, measurement, uuid, project['code'], (md['param_file'] if 'param_file' in md else None)) + imeas += 1 print(mname + " imported.") if not os.path.exists(path / "toml_imports" / uuid): From 74d99f8d5f4a9f24339bf04cf9bdac1e37a3d6d8 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 10 Apr 2026 09:38:31 +0200 Subject: [PATCH 76/92] fix mypy issue --- corrlib/toml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/toml.py b/corrlib/toml.py index e2e257d..0d4dfc8 100644 --- a/corrlib/toml.py +++ b/corrlib/toml.py @@ -202,7 +202,7 @@ def import_toml(path: Path, file: str, copy_file: bool=True) -> None: param = openQCD.load_ms1_parfile(path, uuid, parameter_file) else: # Temporary solution - parameters = {} + parameters: dict[str, Any] = {} parameters["rand"] = {} parameters["rw_fcts"] = [{}] for nrw in range(1): From 6d1f8f7f1baa7efc26b0964af041af97347cb491 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 10 Apr 2026 10:28:28 +0200 Subject: [PATCH 77/92] add NotImplemented warning for openQCD filter --- corrlib/find.py | 5 ++++- tests/find_test.py | 28 +++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 9d07a1c..1c985e2 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -11,6 +11,7 @@ from typing import Any, Optional, Union from pathlib import Path import datetime as dt from collections.abc import Callable +import warnings def _project_lookup_by_alias(db: Path, alias: str) -> str: @@ -162,7 +163,7 @@ def _db_lookup(db: Path, ensemble: str, correlator_name: str, code: str, project return results -def _sfcf_drop(param, **kwargs): +def _sfcf_drop(param: dict[str, Any], **kwargs: Any) -> bool: if 'offset' in kwargs: if kwargs.get('offset') != param['offset']: return True @@ -288,6 +289,8 @@ def openQCD_filter(results:pd.DataFrame, **kwargs: Any) -> pd.DataFrame: The filtered results. """ + warnings.warn("A filter for openQCD parameters is no implemented yet.", Warning) + return results diff --git a/tests/find_test.py b/tests/find_test.py index 36d687e..f512f15 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -256,7 +256,33 @@ def test_sfcf_drop() -> None: def test_openQCD_filter() -> None: - assert True + record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] + data = [ + record_0, + record_1, + record_2, + record_3, + ] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.warns(Warning): + find.openQCD_filter(df, a = "asdf") def test_code_filter() -> None: From 91938c3c5a3f590ad48d471e0a19a8702ba94349 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 14:17:41 +0200 Subject: [PATCH 78/92] add second time integrity check --- corrlib/integrity.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index bf890db..f1459d0 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -2,4 +2,9 @@ import datetime as dt def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: - return not (created_at > updated_at) + # we expect created_at <= updated_at <= now + if created_at > updated_at: + return False + if updated_at > dt.datetime.now(): + return False + return True From 0b8c041ee559af903d6aa1526ed1a59753ab775d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 15:34:05 +0200 Subject: [PATCH 79/92] add wrapper functions to check for the validity of the database --- corrlib/integrity.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index f1459d0..db242f6 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -1,10 +1,32 @@ import datetime as dt +from pathlib import Path +from .tools import get_db_file +import pandas as pd +import sqlite3 -def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: +def has_valid_times(result: pd.DataFrame) -> bool: # we expect created_at <= updated_at <= now + created_at = dt.datetime.fromisoformat(result['created_at']) + updated_at = dt.datetime.fromisoformat(result['updated_at']) if created_at > updated_at: return False if updated_at > dt.datetime.now(): return False return True + + +def check_db_integrity(path: Path) -> None: + db = get_db_file(path) + search_expr = "SELECT * FROM 'backlogs'" + conn = sqlite3.connect(db) + results = pd.read_sql(search_expr, conn) + + for result in results: + if not has_valid_times(result): + raise ValueError(f"Result with id {result[id]} has wrong time signatures.") + + +def full_integrity_check(path: Path) -> None: + check_db_integrity(path) + From 65cd55ec0a8d2afbe5a54159cff393d80da466bd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 16:36:31 +0200 Subject: [PATCH 80/92] add test on whether paths are indeed unique --- corrlib/integrity.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index db242f6..70e4694 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -15,6 +15,14 @@ def has_valid_times(result: pd.DataFrame) -> bool: return False return True +def are_keys_unique(db: Path, table: str, col: str) -> bool: + conn = sqlite3.connect(db) + c = conn.cursor() + c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};") + results = c.fetchall()[0] + conn.close() + return bool(results[0] == results[1]) + def check_db_integrity(path: Path) -> None: db = get_db_file(path) @@ -27,6 +35,7 @@ def check_db_integrity(path: Path) -> None: raise ValueError(f"Result with id {result[id]} has wrong time signatures.") + def full_integrity_check(path: Path) -> None: check_db_integrity(path) From 85698c377bca7405d69c63d13d3ef918d35aaf1a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 16:42:39 +0200 Subject: [PATCH 81/92] use uniqueness for complete db check --- corrlib/integrity.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 70e4694..8a414bf 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -26,6 +26,10 @@ def are_keys_unique(db: Path, table: str, col: str) -> bool: def check_db_integrity(path: Path) -> None: db = get_db_file(path) + + if not are_keys_unique(db, 'backlogs', 'path'): + raise Exception("The paths the backlog table of the database links are not unique.") + search_expr = "SELECT * FROM 'backlogs'" conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) From d8bb9e4080017070bc928d19fadd7e175abeebf0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 15 Apr 2026 10:49:03 +0200 Subject: [PATCH 82/92] fix import --- corrlib/find.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 1c985e2..7b07321 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,8 +6,8 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get -from .integrity import check_time_validity -from typing import Any, Optional, Union +from .integrity import has_valid_times +from typing import Any, Optional from pathlib import Path import datetime as dt from collections.abc import Callable @@ -88,7 +88,7 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) - db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at) + db_times_valid = has_valid_times(result) if not db_times_valid: raise ValueError('Time stamps not valid for result with path', result["path"]) From dc424c3e18ecdeeda834865b05dc9bfac6e41e5a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 15 Apr 2026 11:24:25 +0200 Subject: [PATCH 83/92] fix time tests --- tests/find_test.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tests/find_test.py b/tests/find_test.py index f512f15..cc455f9 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -5,6 +5,7 @@ import corrlib.initialization as cinit import pytest import pandas as pd import datalad.api as dl +import datetime as dt def make_sql(path: Path) -> Path: @@ -57,18 +58,20 @@ def test_find_lookup_by_id(tmp_path: Path) -> None: def test_time_filter() -> None: - record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf0", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created - record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf1", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated - record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", - '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later - record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf2", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-04-14 12:55:18.229966'] # created and updated later + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf3", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] - record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf4", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier - record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf5", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... + record_G = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf2", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', str(dt.datetime.now() + dt.timedelta(days=2, hours=3, minutes=5, seconds=30))] # created and updated later data = [record_A, record_B, record_C, record_D, record_E] cols = ["name", @@ -141,6 +144,21 @@ def test_time_filter() -> None: with pytest.raises(ValueError): results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + data = [record_A, record_B, record_C, record_D, record_G] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.raises(ValueError): + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + def test_db_lookup(tmp_path: Path) -> None: db = make_sql(tmp_path) From b625bf92438ba3fcae0729bddd57554f68275fdd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Wed, 15 Apr 2026 12:02:03 +0200 Subject: [PATCH 84/92] proper row interation --- corrlib/integrity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 8a414bf..d865944 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -5,7 +5,7 @@ import pandas as pd import sqlite3 -def has_valid_times(result: pd.DataFrame) -> bool: +def has_valid_times(result: pd.Series) -> bool: # we expect created_at <= updated_at <= now created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) @@ -34,7 +34,7 @@ def check_db_integrity(path: Path) -> None: conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn) - for result in results: + for _, result in results.iterrows(): if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") From 0b1ff3cbad724456e6c77a5a52dd521bb6ac5ffe Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 16:24:31 +0200 Subject: [PATCH 85/92] prepare implementation --- corrlib/cli.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/corrlib/cli.py b/corrlib/cli.py index 6c1c3c5..2d1a9ee 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -1,6 +1,7 @@ from typing import Optional import typer from corrlib import __app_name__ + from .initialization import create from .toml import import_tomls, update_project, reimport_project from .find import find_record, list_projects @@ -8,6 +9,8 @@ from .tools import str2list from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache from .meas_io import load_record as mio_load_record +# from .integrity import + import os from pyerrors import Corr from importlib.metadata import version @@ -137,6 +140,23 @@ def stat( return +@app.command() +def check(path: Path = typer.Option( + Path('./corrlib'), + "--dataset", + "-d", + ), + files: str = typer.Argument( + ), + copy_file: bool = typer.Option( + bool(True), + "--save", + "-s", + ),) -> None: + + "✅" : "❌" + + @app.command() def importer( path: Path = typer.Option( From 23b5d066f7b4e4733629b07786fd1ffbb117efcd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 16:34:30 +0200 Subject: [PATCH 86/92] make integrity checks accassible from cli --- corrlib/cli.py | 13 +++---------- corrlib/integrity.py | 8 +++++--- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/corrlib/cli.py b/corrlib/cli.py index 2d1a9ee..d24d8ef 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -9,7 +9,7 @@ from .tools import str2list from .main import update_aliases from .meas_io import drop_cache as mio_drop_cache from .meas_io import load_record as mio_load_record -# from .integrity import +from .integrity import full_integrity_check import os from pyerrors import Corr @@ -146,15 +146,8 @@ def check(path: Path = typer.Option( "--dataset", "-d", ), - files: str = typer.Argument( - ), - copy_file: bool = typer.Option( - bool(True), - "--save", - "-s", - ),) -> None: - - "✅" : "❌" + ) -> None: + full_integrity_check(path) @app.command() diff --git a/corrlib/integrity.py b/corrlib/integrity.py index d865944..dc1216c 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -27,19 +27,21 @@ def are_keys_unique(db: Path, table: str, col: str) -> bool: def check_db_integrity(path: Path) -> None: db = get_db_file(path) - if not are_keys_unique(db, 'backlogs', 'path'): + if not are_keys_unique(path / db, 'backlogs', 'path'): raise Exception("The paths the backlog table of the database links are not unique.") search_expr = "SELECT * FROM 'backlogs'" - conn = sqlite3.connect(db) + conn = sqlite3.connect(path / db) results = pd.read_sql(search_expr, conn) for _, result in results.iterrows(): if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") - + print("DB:\t✅") def full_integrity_check(path: Path) -> None: check_db_integrity(path) + print("Full:\t✅") + From b13136a248f294eb2275da54747cc105473655f3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:32:22 +0200 Subject: [PATCH 87/92] add check for links to files --- corrlib/integrity.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index dc1216c..63572a9 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -3,6 +3,8 @@ from pathlib import Path from .tools import get_db_file import pandas as pd import sqlite3 +from .tracker import get +import pyerrors.input.json as pj def has_valid_times(result: pd.Series) -> bool: @@ -38,10 +40,45 @@ def check_db_integrity(path: Path) -> None: if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") print("DB:\t✅") + return + + +def _check_db2paths(path: Path, meas_paths: list[str]) -> None: + needed_data: dict[str, list[str]] = {} + for mpath in meas_paths: + file = mpath.split("::")[0] + if file not in needed_data.keys(): + needed_data[file] = [] + key = mpath.split("::")[1] + needed_data[file].append(key) + + for file in needed_data.keys(): + get(path, Path(file)) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) + if not set(filedict.keys()).issubset(needed_data[file]): + for key in filedict.keys(): + if key not in needed_data[file]: + raise ValueError(f"Found unintended key {key} in file {file}.") + elif not set(needed_data[file]).issubset(filedict.keys()): + for key in needed_data[file]: + if key not in filedict.keys(): + raise ValueError(f"Did not find data for key {key} that should be in file {file}.") + print("Links:\t✅") + return + + +def check_db_file_links(path: Path) -> None: + db = get_db_file(path) + search_expr = "SELECT path FROM 'backlogs'" + conn = sqlite3.connect(path / db) + results = pd.read_sql(search_expr, conn)['path'].values + print(results) + _check_db2paths(path, results) def full_integrity_check(path: Path) -> None: check_db_integrity(path) + check_db_file_links(path) print("Full:\t✅") From 29ebafc1c44e5e0fe30dcb38fdf22d408f746cec Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:34:53 +0200 Subject: [PATCH 88/92] show progress a little --- corrlib/integrity.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 63572a9..8722840 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -52,7 +52,9 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None: key = mpath.split("::")[1] needed_data[file].append(key) - for file in needed_data.keys(): + totf = len(needed_data.keys()) + for i, file in enumerate(needed_data.keys()): + print(f"Check against file {i}/{totf}: {file}") get(path, Path(file)) filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) if not set(filedict.keys()).issubset(needed_data[file]): From 37ae8185897b62017f5822ec0b727685a905a389 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:37:46 +0200 Subject: [PATCH 89/92] small logic issue --- corrlib/integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 8722840..23fbe0e 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -61,7 +61,7 @@ def _check_db2paths(path: Path, meas_paths: list[str]) -> None: for key in filedict.keys(): if key not in needed_data[file]: raise ValueError(f"Found unintended key {key} in file {file}.") - elif not set(needed_data[file]).issubset(filedict.keys()): + if not set(needed_data[file]).issubset(filedict.keys()): for key in needed_data[file]: if key not in filedict.keys(): raise ValueError(f"Did not find data for key {key} that should be in file {file}.") From 0535e19bf08ebced63834ff5796591a815ae20f0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:42:47 +0200 Subject: [PATCH 90/92] fix typing --- corrlib/integrity.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 23fbe0e..5f80aa3 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -6,6 +6,8 @@ import sqlite3 from .tracker import get import pyerrors.input.json as pj +from typing import Any + def has_valid_times(result: pd.Series) -> bool: # we expect created_at <= updated_at <= now @@ -74,8 +76,7 @@ def check_db_file_links(path: Path) -> None: search_expr = "SELECT path FROM 'backlogs'" conn = sqlite3.connect(path / db) results = pd.read_sql(search_expr, conn)['path'].values - print(results) - _check_db2paths(path, results) + _check_db2paths(path, list(results)) def full_integrity_check(path: Path) -> None: From 083d7ee3ce6ec2868a2da1b9c4fd73ef22362f50 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:52:18 +0200 Subject: [PATCH 91/92] add dry run for loading data using the integrity functions --- corrlib/meas_io.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index f4e8a83..731da66 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -11,6 +11,7 @@ from .tracker import get, save, unlock import shutil from typing import Any from pathlib import Path +from .integrity import _check_db2paths CACHE_DIR = ".cache" @@ -153,7 +154,7 @@ def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]: return load_records(path, [meas_path])[0] -def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: +def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}, dry_run: bool = False) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -163,14 +164,19 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = Path of the correlator library. meas_paths: list[str] A list of the paths to the correlator in the backlog system. - perloaded: dict[str, Any] - The data that is already prelaoded. Of interest if data has alread been loaded in the same script. + preloaded: dict[str, Any] + The data that is already preloaded. Of interest if data has alread been loaded in the same script. + dry_run: bool + Do not load datda, just check whether we can reach the data we are interested in. Returns ------- - retruned_data: list + returned_data: list The loaded records. """ + if dry_run: + _check_db2paths(path, meas_paths) + return needed_data: dict[str, list[str]] = {} for mpath in meas_paths: file = mpath.split("::")[0] From 51ae53aa024365f47436c11bf69bc376184ac6b4 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 17 Apr 2026 17:53:13 +0200 Subject: [PATCH 92/92] add empty return --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 731da66..cbd9386 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -176,7 +176,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = """ if dry_run: _check_db2paths(path, meas_paths) - return + return [] needed_data: dict[str, list[str]] = {} for mpath in meas_paths: file = mpath.split("::")[0]