Compare commits

...

5 commits

Author SHA1 Message Date
cc14e68b44
add tests for time filter and find project, add a first check for integrity of the database
Some checks failed
Mypy / mypy (push) Successful in 1m15s
Pytest / pytest (3.12) (push) Successful in 1m20s
Pytest / pytest (3.13) (push) Successful in 1m16s
Pytest / pytest (3.14) (push) Successful in 1m25s
Ruff / ruff (push) Has been cancelled
2026-03-26 17:19:58 +01:00
a90b992326
Merge branch 'develop' into tests/find 2026-03-26 12:43:47 +01:00
38b4983fed
HOTFIX: hand over path as str 2
All checks were successful
Mypy / mypy (push) Successful in 1m14s
Pytest / pytest (3.13) (push) Successful in 1m13s
Pytest / pytest (3.14) (push) Successful in 1m19s
Pytest / pytest (3.12) (push) Successful in 1m18s
Ruff / ruff (push) Successful in 1m3s
2026-03-24 18:50:30 +01:00
b8121811f9
HOTFIX: hand over path as str
Some checks are pending
Mypy / mypy (push) Waiting to run
Pytest / pytest (3.12) (push) Waiting to run
Pytest / pytest (3.13) (push) Waiting to run
Pytest / pytest (3.14) (push) Waiting to run
Ruff / ruff (push) Waiting to run
2026-03-24 18:49:04 +01:00
2fd46d452b
hotfix ensure that path is a Path
All checks were successful
Mypy / mypy (push) Successful in 1m13s
Pytest / pytest (3.12) (push) Successful in 1m19s
Pytest / pytest (3.13) (push) Successful in 1m14s
Pytest / pytest (3.14) (push) Successful in 1m18s
Ruff / ruff (push) Successful in 1m3s
2026-03-24 18:40:46 +01:00
4 changed files with 128 additions and 3 deletions

View file

@ -6,6 +6,7 @@ import numpy as np
from .input.implementations import codes
from .tools import k2m, get_db_file
from .tracker import get
from .integrity import check_time_validity
from typing import Any, Optional, Union
from pathlib import Path
import datetime as dt
@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre
result = results.iloc[ind]
created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at'])
db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at)
if not db_times_valid:
raise ValueError('Time stamps not valid for result with path', result["path"])
if created_before is not None:
date_created_before = dt.datetime.fromisoformat(created_before)

5
corrlib/integrity.py Normal file
View file

@ -0,0 +1,5 @@
import datetime as dt
def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool:
return not (created_at > updated_at)

View file

@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
if cache_enabled(path):
if not os.path.exists(cache_dir(path, file)):
os.makedirs(cache_dir(path, file))
dump_object(preloaded[file][key], cache_path(path, file, key))
dump_object(preloaded[file][key], str(cache_path(path, file, key)))
return returned_data
@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path:
The path holding the cached data for the given file.
"""
cache_path_list = file.split("/")[1:]
cache_path = path / CACHE_DIR
cache_path = Path(path) / CACHE_DIR
for directory in cache_path_list:
cache_path /= directory
return cache_path
@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]:
The data read from the file.
"""
get(path, file)
filedict: dict[str, Any] = pj.load_json_dict(path / file)
filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
print("> read file")
return filedict

View file

@ -3,6 +3,8 @@ import sqlite3
from pathlib import Path
import corrlib.initialization as cinit
import pytest
import pandas as pd
import datalad.api as dl
def make_sql(path: Path) -> Path:
@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
conn.close()
def test_find_project(tmp_path: Path) -> None:
cinit.create(tmp_path)
db = tmp_path / "backlogger.db"
dl.unlock(str(db), dataset=str(tmp_path))
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
assert uuid == find.find_project(tmp_path, "fun_project")
uuid = "test_uuid2"
alias_str = "fun_project"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
with pytest.raises(Exception):
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
conn.close()
def test_find_lookup_by_id(tmp_path: Path) -> None:
db = make_sql(tmp_path)
conn = sqlite3.connect(db)
@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None:
assert len(results) == 1
conn.close()
def test_time_filter() -> None:
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
data = [record_A, record_B, record_C, record_D, record_E]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
data = [record_A, record_B, record_C, record_D, record_F]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
with pytest.raises(ValueError):
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')