Compare commits
5 commits
4516ca3149
...
cc14e68b44
| Author | SHA1 | Date | |
|---|---|---|---|
|
cc14e68b44 |
|||
|
a90b992326 |
|||
|
38b4983fed |
|||
|
b8121811f9 |
|||
|
2fd46d452b |
4 changed files with 128 additions and 3 deletions
|
|
@ -6,6 +6,7 @@ import numpy as np
|
||||||
from .input.implementations import codes
|
from .input.implementations import codes
|
||||||
from .tools import k2m, get_db_file
|
from .tools import k2m, get_db_file
|
||||||
from .tracker import get
|
from .tracker import get
|
||||||
|
from .integrity import check_time_validity
|
||||||
from typing import Any, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
|
@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre
|
||||||
result = results.iloc[ind]
|
result = results.iloc[ind]
|
||||||
created_at = dt.datetime.fromisoformat(result['created_at'])
|
created_at = dt.datetime.fromisoformat(result['created_at'])
|
||||||
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
||||||
|
db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at)
|
||||||
|
if not db_times_valid:
|
||||||
|
raise ValueError('Time stamps not valid for result with path', result["path"])
|
||||||
|
|
||||||
if created_before is not None:
|
if created_before is not None:
|
||||||
date_created_before = dt.datetime.fromisoformat(created_before)
|
date_created_before = dt.datetime.fromisoformat(created_before)
|
||||||
|
|
|
||||||
5
corrlib/integrity.py
Normal file
5
corrlib/integrity.py
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
import datetime as dt
|
||||||
|
|
||||||
|
|
||||||
|
def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool:
|
||||||
|
return not (created_at > updated_at)
|
||||||
|
|
@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
|
||||||
if cache_enabled(path):
|
if cache_enabled(path):
|
||||||
if not os.path.exists(cache_dir(path, file)):
|
if not os.path.exists(cache_dir(path, file)):
|
||||||
os.makedirs(cache_dir(path, file))
|
os.makedirs(cache_dir(path, file))
|
||||||
dump_object(preloaded[file][key], cache_path(path, file, key))
|
dump_object(preloaded[file][key], str(cache_path(path, file, key)))
|
||||||
return returned_data
|
return returned_data
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path:
|
||||||
The path holding the cached data for the given file.
|
The path holding the cached data for the given file.
|
||||||
"""
|
"""
|
||||||
cache_path_list = file.split("/")[1:]
|
cache_path_list = file.split("/")[1:]
|
||||||
cache_path = path / CACHE_DIR
|
cache_path = Path(path) / CACHE_DIR
|
||||||
for directory in cache_path_list:
|
for directory in cache_path_list:
|
||||||
cache_path /= directory
|
cache_path /= directory
|
||||||
return cache_path
|
return cache_path
|
||||||
|
|
@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]:
|
||||||
The data read from the file.
|
The data read from the file.
|
||||||
"""
|
"""
|
||||||
get(path, file)
|
get(path, file)
|
||||||
filedict: dict[str, Any] = pj.load_json_dict(path / file)
|
filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
|
||||||
print("> read file")
|
print("> read file")
|
||||||
return filedict
|
return filedict
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ import sqlite3
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import corrlib.initialization as cinit
|
import corrlib.initialization as cinit
|
||||||
import pytest
|
import pytest
|
||||||
|
import pandas as pd
|
||||||
|
import datalad.api as dl
|
||||||
|
|
||||||
|
|
||||||
def make_sql(path: Path) -> Path:
|
def make_sql(path: Path) -> Path:
|
||||||
|
|
@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_project(tmp_path: Path) -> None:
|
||||||
|
cinit.create(tmp_path)
|
||||||
|
db = tmp_path / "backlogger.db"
|
||||||
|
dl.unlock(str(db), dataset=str(tmp_path))
|
||||||
|
conn = sqlite3.connect(db)
|
||||||
|
c = conn.cursor()
|
||||||
|
uuid = "test_uuid"
|
||||||
|
alias_str = "fun_project"
|
||||||
|
tag_str = "tt"
|
||||||
|
owner = "tester"
|
||||||
|
code = "test_code"
|
||||||
|
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
|
||||||
|
(uuid, alias_str, tag_str, owner, code))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
assert uuid == find.find_project(tmp_path, "fun_project")
|
||||||
|
|
||||||
|
uuid = "test_uuid2"
|
||||||
|
alias_str = "fun_project"
|
||||||
|
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
|
||||||
|
(uuid, alias_str, tag_str, owner, code))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
def test_find_lookup_by_id(tmp_path: Path) -> None:
|
def test_find_lookup_by_id(tmp_path: Path) -> None:
|
||||||
db = make_sql(tmp_path)
|
db = make_sql(tmp_path)
|
||||||
conn = sqlite3.connect(db)
|
conn = sqlite3.connect(db)
|
||||||
|
|
@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None:
|
||||||
assert len(results) == 1
|
assert len(results) == 1
|
||||||
|
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def test_time_filter() -> None:
|
||||||
|
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||||
|
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
|
||||||
|
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||||
|
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
|
||||||
|
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||||
|
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
|
||||||
|
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||||
|
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
|
||||||
|
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||||
|
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
|
||||||
|
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||||
|
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
|
||||||
|
|
||||||
|
data = [record_A, record_B, record_C, record_D, record_E]
|
||||||
|
cols = ["name",
|
||||||
|
"ensemble",
|
||||||
|
"code",
|
||||||
|
"path",
|
||||||
|
"project",
|
||||||
|
"parameters",
|
||||||
|
"parameter_file",
|
||||||
|
"created_at",
|
||||||
|
"updated_at"]
|
||||||
|
df = pd.DataFrame(data,columns=cols)
|
||||||
|
|
||||||
|
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
|
||||||
|
assert results.empty
|
||||||
|
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
|
||||||
|
assert len(results) == 5
|
||||||
|
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
|
||||||
|
assert len(results) == 3
|
||||||
|
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
|
||||||
|
assert len(results) == 3
|
||||||
|
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
|
||||||
|
assert len(results) == 5
|
||||||
|
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
|
||||||
|
assert results.empty
|
||||||
|
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
|
||||||
|
assert len(results) == 2
|
||||||
|
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
|
||||||
|
assert len(results) == 2
|
||||||
|
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
|
||||||
|
assert len(results) == 4
|
||||||
|
|
||||||
|
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
|
||||||
|
assert results.empty
|
||||||
|
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
|
||||||
|
assert len(results) == 5
|
||||||
|
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
|
||||||
|
assert len(results) == 3
|
||||||
|
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
|
||||||
|
assert len(results) == 3
|
||||||
|
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
|
||||||
|
assert len(results) == 1
|
||||||
|
|
||||||
|
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
|
||||||
|
assert len(results) == 5
|
||||||
|
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
|
||||||
|
assert results.empty
|
||||||
|
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
|
||||||
|
assert len(results) == 2
|
||||||
|
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
|
||||||
|
assert len(results) == 2
|
||||||
|
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
|
||||||
|
assert len(results) == 4
|
||||||
|
|
||||||
|
data = [record_A, record_B, record_C, record_D, record_F]
|
||||||
|
cols = ["name",
|
||||||
|
"ensemble",
|
||||||
|
"code",
|
||||||
|
"path",
|
||||||
|
"project",
|
||||||
|
"parameters",
|
||||||
|
"parameter_file",
|
||||||
|
"created_at",
|
||||||
|
"updated_at"]
|
||||||
|
df = pd.DataFrame(data,columns=cols)
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue