From 2fd46d452b84cf326d19d0a814a465631b5c8241 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:40:46 +0100 Subject: [PATCH 1/4] hotfix ensure that path is a Path --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index be80b6f..de19727 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path: The path holding the cached data for the given file. """ cache_path_list = file.split("/")[1:] - cache_path = path / CACHE_DIR + cache_path = Path(path) / CACHE_DIR for directory in cache_path_list: cache_path /= directory return cache_path From b8121811f967530f174202d5c67e72a0132295de Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:49:04 +0100 Subject: [PATCH 2/4] HOTFIX: hand over path as str --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index de19727..a87e227 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]: The data read from the file. """ get(path, file) - filedict: dict[str, Any] = pj.load_json_dict(path / file) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) print("> read file") return filedict From 38b4983fed0af22231b27b57dc32a4efb121a63a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 24 Mar 2026 18:50:30 +0100 Subject: [PATCH 3/4] HOTFIX: hand over path as str 2 --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index a87e227..48017a1 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = if cache_enabled(path): if not os.path.exists(cache_dir(path, file)): os.makedirs(cache_dir(path, file)) - dump_object(preloaded[file][key], cache_path(path, file, key)) + dump_object(preloaded[file][key], str(cache_path(path, file, key))) return returned_data From cc14e68b4429a122ee0c9b299555f4e7ca8fef45 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 26 Mar 2026 17:19:58 +0100 Subject: [PATCH 4/4] add tests for time filter and find project, add a first check for integrity of the database --- corrlib/find.py | 4 ++ corrlib/integrity.py | 5 ++ tests/find_test.py | 116 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 corrlib/integrity.py diff --git a/corrlib/find.py b/corrlib/find.py index dd3a9a6..3cbe09b 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -6,6 +6,7 @@ import numpy as np from .input.implementations import codes from .tools import k2m, get_db_file from .tracker import get +from .integrity import check_time_validity from typing import Any, Optional, Union from pathlib import Path import datetime as dt @@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre result = results.iloc[ind] created_at = dt.datetime.fromisoformat(result['created_at']) updated_at = dt.datetime.fromisoformat(result['updated_at']) + db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at) + if not db_times_valid: + raise ValueError('Time stamps not valid for result with path', result["path"]) if created_before is not None: date_created_before = dt.datetime.fromisoformat(created_before) diff --git a/corrlib/integrity.py b/corrlib/integrity.py new file mode 100644 index 0000000..bf890db --- /dev/null +++ b/corrlib/integrity.py @@ -0,0 +1,5 @@ +import datetime as dt + + +def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool: + return not (created_at > updated_at) diff --git a/tests/find_test.py b/tests/find_test.py index e895b85..573f87e 100644 --- a/tests/find_test.py +++ b/tests/find_test.py @@ -3,6 +3,8 @@ import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest +import pandas as pd +import datalad.api as dl def make_sql(path: Path) -> Path: @@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None: conn.close() +def test_find_project(tmp_path: Path) -> None: + cinit.create(tmp_path) + db = tmp_path / "backlogger.db" + dl.unlock(str(db), dataset=str(tmp_path)) + conn = sqlite3.connect(db) + c = conn.cursor() + uuid = "test_uuid" + alias_str = "fun_project" + tag_str = "tt" + owner = "tester" + code = "test_code" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + assert uuid == find.find_project(tmp_path, "fun_project") + + uuid = "test_uuid2" + alias_str = "fun_project" + c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + (uuid, alias_str, tag_str, owner, code)) + conn.commit() + + with pytest.raises(Exception): + assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") + conn.close() + + def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) @@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None: assert len(results) == 1 conn.close() + + +def test_time_filter() -> None: + record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created + record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated + record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later + record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] + record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier + record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", + '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... + + data = [record_A, record_B, record_C, record_D, record_E] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') + assert len(results) == 3 + results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') + assert len(results) == 3 + results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') + assert len(results) == 1 + + results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') + assert len(results) == 2 + results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') + assert len(results) == 2 + results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') + assert len(results) == 4 + + results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') + assert len(results) == 3 + results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') + assert len(results) == 3 + results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') + assert len(results) == 1 + + results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') + assert len(results) == 5 + results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') + assert results.empty + results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') + assert len(results) == 2 + results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') + assert len(results) == 2 + results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') + assert len(results) == 4 + + data = [record_A, record_B, record_C, record_D, record_F] + cols = ["name", + "ensemble", + "code", + "path", + "project", + "parameters", + "parameter_file", + "created_at", + "updated_at"] + df = pd.DataFrame(data,columns=cols) + + with pytest.raises(ValueError): + results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')