Compare commits
5 commits
4516ca3149
...
cc14e68b44
| Author | SHA1 | Date | |
|---|---|---|---|
|
cc14e68b44 |
|||
|
a90b992326 |
|||
|
38b4983fed |
|||
|
b8121811f9 |
|||
|
2fd46d452b |
4 changed files with 128 additions and 3 deletions
|
|
@ -6,6 +6,7 @@ import numpy as np
|
|||
from .input.implementations import codes
|
||||
from .tools import k2m, get_db_file
|
||||
from .tracker import get
|
||||
from .integrity import check_time_validity
|
||||
from typing import Any, Optional, Union
|
||||
from pathlib import Path
|
||||
import datetime as dt
|
||||
|
|
@ -70,6 +71,9 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre
|
|||
result = results.iloc[ind]
|
||||
created_at = dt.datetime.fromisoformat(result['created_at'])
|
||||
updated_at = dt.datetime.fromisoformat(result['updated_at'])
|
||||
db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at)
|
||||
if not db_times_valid:
|
||||
raise ValueError('Time stamps not valid for result with path', result["path"])
|
||||
|
||||
if created_before is not None:
|
||||
date_created_before = dt.datetime.fromisoformat(created_before)
|
||||
|
|
|
|||
5
corrlib/integrity.py
Normal file
5
corrlib/integrity.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
import datetime as dt
|
||||
|
||||
|
||||
def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool:
|
||||
return not (created_at > updated_at)
|
||||
|
|
@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
|
|||
if cache_enabled(path):
|
||||
if not os.path.exists(cache_dir(path, file)):
|
||||
os.makedirs(cache_dir(path, file))
|
||||
dump_object(preloaded[file][key], cache_path(path, file, key))
|
||||
dump_object(preloaded[file][key], str(cache_path(path, file, key)))
|
||||
return returned_data
|
||||
|
||||
|
||||
|
|
@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path:
|
|||
The path holding the cached data for the given file.
|
||||
"""
|
||||
cache_path_list = file.split("/")[1:]
|
||||
cache_path = path / CACHE_DIR
|
||||
cache_path = Path(path) / CACHE_DIR
|
||||
for directory in cache_path_list:
|
||||
cache_path /= directory
|
||||
return cache_path
|
||||
|
|
@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]:
|
|||
The data read from the file.
|
||||
"""
|
||||
get(path, file)
|
||||
filedict: dict[str, Any] = pj.load_json_dict(path / file)
|
||||
filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
|
||||
print("> read file")
|
||||
return filedict
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ import sqlite3
|
|||
from pathlib import Path
|
||||
import corrlib.initialization as cinit
|
||||
import pytest
|
||||
import pandas as pd
|
||||
import datalad.api as dl
|
||||
|
||||
|
||||
def make_sql(path: Path) -> Path:
|
||||
|
|
@ -34,6 +36,34 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
|
|||
conn.close()
|
||||
|
||||
|
||||
def test_find_project(tmp_path: Path) -> None:
|
||||
cinit.create(tmp_path)
|
||||
db = tmp_path / "backlogger.db"
|
||||
dl.unlock(str(db), dataset=str(tmp_path))
|
||||
conn = sqlite3.connect(db)
|
||||
c = conn.cursor()
|
||||
uuid = "test_uuid"
|
||||
alias_str = "fun_project"
|
||||
tag_str = "tt"
|
||||
owner = "tester"
|
||||
code = "test_code"
|
||||
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
|
||||
(uuid, alias_str, tag_str, owner, code))
|
||||
conn.commit()
|
||||
|
||||
assert uuid == find.find_project(tmp_path, "fun_project")
|
||||
|
||||
uuid = "test_uuid2"
|
||||
alias_str = "fun_project"
|
||||
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
|
||||
(uuid, alias_str, tag_str, owner, code))
|
||||
conn.commit()
|
||||
|
||||
with pytest.raises(Exception):
|
||||
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_find_lookup_by_id(tmp_path: Path) -> None:
|
||||
db = make_sql(tmp_path)
|
||||
conn = sqlite3.connect(db)
|
||||
|
|
@ -122,3 +152,89 @@ def test_db_lookup(tmp_path: Path) -> None:
|
|||
assert len(results) == 1
|
||||
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_time_filter() -> None:
|
||||
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
|
||||
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
|
||||
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
|
||||
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
|
||||
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
|
||||
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
|
||||
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
|
||||
|
||||
data = [record_A, record_B, record_C, record_D, record_E]
|
||||
cols = ["name",
|
||||
"ensemble",
|
||||
"code",
|
||||
"path",
|
||||
"project",
|
||||
"parameters",
|
||||
"parameter_file",
|
||||
"created_at",
|
||||
"updated_at"]
|
||||
df = pd.DataFrame(data,columns=cols)
|
||||
|
||||
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
|
||||
assert results.empty
|
||||
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
|
||||
assert len(results) == 5
|
||||
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
|
||||
assert len(results) == 3
|
||||
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
|
||||
assert len(results) == 3
|
||||
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
|
||||
assert len(results) == 1
|
||||
|
||||
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
|
||||
assert len(results) == 5
|
||||
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
|
||||
assert results.empty
|
||||
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
|
||||
assert len(results) == 2
|
||||
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
|
||||
assert len(results) == 2
|
||||
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
|
||||
assert len(results) == 4
|
||||
|
||||
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
|
||||
assert results.empty
|
||||
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
|
||||
assert len(results) == 5
|
||||
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
|
||||
assert len(results) == 3
|
||||
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
|
||||
assert len(results) == 3
|
||||
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
|
||||
assert len(results) == 1
|
||||
|
||||
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
|
||||
assert len(results) == 5
|
||||
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
|
||||
assert results.empty
|
||||
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
|
||||
assert len(results) == 2
|
||||
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
|
||||
assert len(results) == 2
|
||||
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
|
||||
assert len(results) == 4
|
||||
|
||||
data = [record_A, record_B, record_C, record_D, record_F]
|
||||
cols = ["name",
|
||||
"ensemble",
|
||||
"code",
|
||||
"path",
|
||||
"project",
|
||||
"parameters",
|
||||
"parameter_file",
|
||||
"created_at",
|
||||
"updated_at"]
|
||||
df = pd.DataFrame(data,columns=cols)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue