add tests for time filter and find project, add a first check for integrity of the database
Some checks failed
Mypy / mypy (push) Successful in 1m15s
Pytest / pytest (3.12) (push) Successful in 1m20s
Pytest / pytest (3.13) (push) Successful in 1m16s
Pytest / pytest (3.14) (push) Successful in 1m25s
Ruff / ruff (push) Has been cancelled

This commit is contained in:
Justus Kuhlmann 2026-03-26 17:19:58 +01:00
commit cc14e68b44
Signed by: jkuhl
GPG key ID: 00ED992DD79B85A6
3 changed files with 125 additions and 0 deletions

View file

@ -6,6 +6,7 @@ import numpy as np
from .input.implementations import codes
from .tools import k2m, get_db_file
from .tracker import get
from .integrity import check_time_validity
from typing import Any, Optional, Union
from pathlib import Path
import datetime as dt
@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre
result = results.iloc[ind]
created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at'])
db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at)
if not db_times_valid:
raise ValueError('Time stamps not valid for result with path', result["path"])
if created_before is not None:
date_created_before = dt.datetime.fromisoformat(created_before)

5
corrlib/integrity.py Normal file
View file

@ -0,0 +1,5 @@
import datetime as dt
def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool:
return not (created_at > updated_at)

View file

@ -3,6 +3,8 @@ import sqlite3
from pathlib import Path
import corrlib.initialization as cinit
import pytest
import pandas as pd
import datalad.api as dl
def make_sql(path: Path) -> Path:
@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
conn.close()
def test_find_project(tmp_path: Path) -> None:
cinit.create(tmp_path)
db = tmp_path / "backlogger.db"
dl.unlock(str(db), dataset=str(tmp_path))
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
assert uuid == find.find_project(tmp_path, "fun_project")
uuid = "test_uuid2"
alias_str = "fun_project"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
with pytest.raises(Exception):
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
conn.close()
def test_find_lookup_by_id(tmp_path: Path) -> None:
db = make_sql(tmp_path)
conn = sqlite3.connect(db)
@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None:
assert len(results) == 1
conn.close()
def test_time_filter() -> None:
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
data = [record_A, record_B, record_C, record_D, record_E]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
data = [record_A, record_B, record_C, record_D, record_F]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
with pytest.raises(ValueError):
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')