Compare commits

...

5 commits

Author SHA1 Message Date
cc14e68b44
add tests for time filter and find project, add a first check for integrity of the database
Some checks failed
Mypy / mypy (push) Successful in 1m15s
Pytest / pytest (3.12) (push) Successful in 1m20s
Pytest / pytest (3.13) (push) Successful in 1m16s
Pytest / pytest (3.14) (push) Successful in 1m25s
Ruff / ruff (push) Has been cancelled
2026-03-26 17:19:58 +01:00
a90b992326
Merge branch 'develop' into tests/find 2026-03-26 12:43:47 +01:00
38b4983fed
HOTFIX: hand over path as str 2
All checks were successful
Mypy / mypy (push) Successful in 1m14s
Pytest / pytest (3.13) (push) Successful in 1m13s
Pytest / pytest (3.14) (push) Successful in 1m19s
Pytest / pytest (3.12) (push) Successful in 1m18s
Ruff / ruff (push) Successful in 1m3s
2026-03-24 18:50:30 +01:00
b8121811f9
HOTFIX: hand over path as str
Some checks are pending
Mypy / mypy (push) Waiting to run
Pytest / pytest (3.12) (push) Waiting to run
Pytest / pytest (3.13) (push) Waiting to run
Pytest / pytest (3.14) (push) Waiting to run
Ruff / ruff (push) Waiting to run
2026-03-24 18:49:04 +01:00
2fd46d452b
hotfix ensure that path is a Path
All checks were successful
Mypy / mypy (push) Successful in 1m13s
Pytest / pytest (3.12) (push) Successful in 1m19s
Pytest / pytest (3.13) (push) Successful in 1m14s
Pytest / pytest (3.14) (push) Successful in 1m18s
Ruff / ruff (push) Successful in 1m3s
2026-03-24 18:40:46 +01:00
4 changed files with 128 additions and 3 deletions

View file

@ -6,6 +6,7 @@ import numpy as np
from .input.implementations import codes from .input.implementations import codes
from .tools import k2m, get_db_file from .tools import k2m, get_db_file
from .tracker import get from .tracker import get
from .integrity import check_time_validity
from typing import Any, Optional, Union from typing import Any, Optional, Union
from pathlib import Path from pathlib import Path
import datetime as dt import datetime as dt
@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre
result = results.iloc[ind] result = results.iloc[ind]
created_at = dt.datetime.fromisoformat(result['created_at']) created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at']) updated_at = dt.datetime.fromisoformat(result['updated_at'])
db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at)
if not db_times_valid:
raise ValueError('Time stamps not valid for result with path', result["path"])
if created_before is not None: if created_before is not None:
date_created_before = dt.datetime.fromisoformat(created_before) date_created_before = dt.datetime.fromisoformat(created_before)

5
corrlib/integrity.py Normal file
View file

@ -0,0 +1,5 @@
import datetime as dt
def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool:
return not (created_at > updated_at)

View file

@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
if cache_enabled(path): if cache_enabled(path):
if not os.path.exists(cache_dir(path, file)): if not os.path.exists(cache_dir(path, file)):
os.makedirs(cache_dir(path, file)) os.makedirs(cache_dir(path, file))
dump_object(preloaded[file][key], cache_path(path, file, key)) dump_object(preloaded[file][key], str(cache_path(path, file, key)))
return returned_data return returned_data
@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path:
The path holding the cached data for the given file. The path holding the cached data for the given file.
""" """
cache_path_list = file.split("/")[1:] cache_path_list = file.split("/")[1:]
cache_path = path / CACHE_DIR cache_path = Path(path) / CACHE_DIR
for directory in cache_path_list: for directory in cache_path_list:
cache_path /= directory cache_path /= directory
return cache_path return cache_path
@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]:
The data read from the file. The data read from the file.
""" """
get(path, file) get(path, file)
filedict: dict[str, Any] = pj.load_json_dict(path / file) filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
print("> read file") print("> read file")
return filedict return filedict

View file

@ -3,6 +3,8 @@ import sqlite3
from pathlib import Path from pathlib import Path
import corrlib.initialization as cinit import corrlib.initialization as cinit
import pytest import pytest
import pandas as pd
import datalad.api as dl
def make_sql(path: Path) -> Path: def make_sql(path: Path) -> Path:
@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
conn.close() conn.close()
def test_find_project(tmp_path: Path) -> None:
cinit.create(tmp_path)
db = tmp_path / "backlogger.db"
dl.unlock(str(db), dataset=str(tmp_path))
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
assert uuid == find.find_project(tmp_path, "fun_project")
uuid = "test_uuid2"
alias_str = "fun_project"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
with pytest.raises(Exception):
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
conn.close()
def test_find_lookup_by_id(tmp_path: Path) -> None: def test_find_lookup_by_id(tmp_path: Path) -> None:
db = make_sql(tmp_path) db = make_sql(tmp_path)
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None:
assert len(results) == 1 assert len(results) == 1
conn.close() conn.close()
def test_time_filter() -> None:
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
data = [record_A, record_B, record_C, record_D, record_E]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
data = [record_A, record_B, record_C, record_D, record_F]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
with pytest.raises(ValueError):
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')