Compare commits

..

No commits in common. "cc14e68b4429a122ee0c9b299555f4e7ca8fef45" and "4516ca3149cac8b2f0420903c41576b471b7ed8f" have entirely different histories.

4 changed files with 3 additions and 128 deletions

View file

@ -6,7 +6,6 @@ import numpy as np
from .input.implementations import codes from .input.implementations import codes
from .tools import k2m, get_db_file from .tools import k2m, get_db_file
from .tracker import get from .tracker import get
from .integrity import check_time_validity
from typing import Any, Optional, Union from typing import Any, Optional, Union
from pathlib import Path from pathlib import Path
import datetime as dt import datetime as dt
@ -71,9 +70,6 @@ def _time_filter(results: pd.DataFrame, created_before: Optional[str]=None, cre
result = results.iloc[ind] result = results.iloc[ind]
created_at = dt.datetime.fromisoformat(result['created_at']) created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at']) updated_at = dt.datetime.fromisoformat(result['updated_at'])
db_times_valid = check_time_validity(created_at=created_at, updated_at=updated_at)
if not db_times_valid:
raise ValueError('Time stamps not valid for result with path', result["path"])
if created_before is not None: if created_before is not None:
date_created_before = dt.datetime.fromisoformat(created_before) date_created_before = dt.datetime.fromisoformat(created_before)

View file

@ -1,5 +0,0 @@
import datetime as dt
def check_time_validity(created_at: dt.datetime, updated_at: dt.datetime) -> bool:
return not (created_at > updated_at)

View file

@ -175,7 +175,7 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
if cache_enabled(path): if cache_enabled(path):
if not os.path.exists(cache_dir(path, file)): if not os.path.exists(cache_dir(path, file)):
os.makedirs(cache_dir(path, file)) os.makedirs(cache_dir(path, file))
dump_object(preloaded[file][key], str(cache_path(path, file, key))) dump_object(preloaded[file][key], cache_path(path, file, key))
return returned_data return returned_data
@ -195,7 +195,7 @@ def cache_dir(path: Path, file: str) -> Path:
The path holding the cached data for the given file. The path holding the cached data for the given file.
""" """
cache_path_list = file.split("/")[1:] cache_path_list = file.split("/")[1:]
cache_path = Path(path) / CACHE_DIR cache_path = path / CACHE_DIR
for directory in cache_path_list: for directory in cache_path_list:
cache_path /= directory cache_path /= directory
return cache_path return cache_path
@ -238,7 +238,7 @@ def preload(path: Path, file: Path) -> dict[str, Any]:
The data read from the file. The data read from the file.
""" """
get(path, file) get(path, file)
filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) filedict: dict[str, Any] = pj.load_json_dict(path / file)
print("> read file") print("> read file")
return filedict return filedict

View file

@ -3,8 +3,6 @@ import sqlite3
from pathlib import Path from pathlib import Path
import corrlib.initialization as cinit import corrlib.initialization as cinit
import pytest import pytest
import pandas as pd
import datalad.api as dl
def make_sql(path: Path) -> Path: def make_sql(path: Path) -> Path:
@ -36,34 +34,6 @@ def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
conn.close() conn.close()
def test_find_project(tmp_path: Path) -> None:
cinit.create(tmp_path)
db = tmp_path / "backlogger.db"
dl.unlock(str(db), dataset=str(tmp_path))
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
assert uuid == find.find_project(tmp_path, "fun_project")
uuid = "test_uuid2"
alias_str = "fun_project"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
with pytest.raises(Exception):
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
conn.close()
def test_find_lookup_by_id(tmp_path: Path) -> None: def test_find_lookup_by_id(tmp_path: Path) -> None:
db = make_sql(tmp_path) db = make_sql(tmp_path)
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
@ -152,89 +122,3 @@ def test_db_lookup(tmp_path: Path) -> None:
assert len(results) == 1 assert len(results) == 1
conn.close() conn.close()
def test_time_filter() -> None:
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
data = [record_A, record_B, record_C, record_D, record_E]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
data = [record_A, record_B, record_C, record_D, record_F]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
with pytest.raises(ValueError):
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')