corrlib/tests/find_test.py
Justus Kuhlmann cc14e68b44
Some checks failed
Mypy / mypy (push) Successful in 1m15s
Pytest / pytest (3.12) (push) Successful in 1m20s
Pytest / pytest (3.13) (push) Successful in 1m16s
Pytest / pytest (3.14) (push) Successful in 1m25s
Ruff / ruff (push) Has been cancelled
add tests for time filter and find project, add a first check for integrity of the database
2026-03-26 17:19:58 +01:00

240 lines
10 KiB
Python

import corrlib.find as find
import sqlite3
from pathlib import Path
import corrlib.initialization as cinit
import pytest
import pandas as pd
import datalad.api as dl
def make_sql(path: Path) -> Path:
db = path / "test.db"
cinit._create_db(db)
return db
def test_find_lookup_by_one_alias(tmp_path: Path) -> None:
db = make_sql(tmp_path)
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
assert uuid == find._project_lookup_by_alias(db, "fun_project")
uuid = "test_uuid2"
alias_str = "fun_project"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
with pytest.raises(Exception):
assert uuid == find._project_lookup_by_alias(db, "fun_project")
conn.close()
def test_find_project(tmp_path: Path) -> None:
cinit.create(tmp_path)
db = tmp_path / "backlogger.db"
dl.unlock(str(db), dataset=str(tmp_path))
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
assert uuid == find.find_project(tmp_path, "fun_project")
uuid = "test_uuid2"
alias_str = "fun_project"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
with pytest.raises(Exception):
assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project")
conn.close()
def test_find_lookup_by_id(tmp_path: Path) -> None:
db = make_sql(tmp_path)
conn = sqlite3.connect(db)
c = conn.cursor()
uuid = "test_uuid"
alias_str = "fun_project"
tag_str = "tt"
owner = "tester"
code = "test_code"
c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(uuid, alias_str, tag_str, owner, code))
conn.commit()
conn.close()
result = find._project_lookup_by_id(db, uuid)[0]
assert uuid == result[0]
assert alias_str == result[1]
assert tag_str == result[2]
assert owner == result[3]
assert code == result[4]
def test_db_lookup(tmp_path: Path) -> None:
db = make_sql(tmp_path)
conn = sqlite3.connect(db)
c = conn.cursor()
corr = "f_A"
ensemble = "SF_A"
code = "openQCD"
meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf"
uuid = "Project_A"
pars = "{par_A: 3.0, par_B: 5.0}"
parameter_file = "projects/Project_A/myinput.in"
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(corr, ensemble, code, meas_path, uuid, pars, parameter_file))
conn.commit()
results = find._db_lookup(db, ensemble, corr, code)
assert len(results) == 1
results = find._db_lookup(db, "SF_B", corr, code)
assert results.empty
results = find._db_lookup(db, ensemble, "g_A", code)
assert results.empty
results = find._db_lookup(db, ensemble, corr, "sfcf")
assert results.empty
results = find._db_lookup(db, ensemble, corr, code, project = "Project_A")
assert len(results) == 1
results = find._db_lookup(db, ensemble, corr, code, project = "Project_B")
assert results.empty
results = find._db_lookup(db, ensemble, corr, code, parameters = pars)
assert len(results) == 1
results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}")
assert results.empty
corr = "g_A"
ensemble = "SF_A"
code = "openQCD"
meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf"
uuid = "Project_A"
pars = "{par_A: 3.0, par_B: 4.0}"
parameter_file = "projects/Project_A/myinput.in"
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(corr, ensemble, code, meas_path, uuid, pars, parameter_file))
conn.commit()
corr = "f_A"
results = find._db_lookup(db, ensemble, corr, code)
assert len(results) == 1
results = find._db_lookup(db, "SF_B", corr, code)
assert results.empty
results = find._db_lookup(db, ensemble, "g_A", code)
assert len(results) == 1
results = find._db_lookup(db, ensemble, corr, "sfcf")
assert results.empty
results = find._db_lookup(db, ensemble, corr, code, project = "Project_A")
assert len(results) == 1
results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A")
assert len(results) == 1
results = find._db_lookup(db, ensemble, corr, code, project = "Project_B")
assert results.empty
results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B")
assert results.empty
results = find._db_lookup(db, ensemble, corr, code, parameters = pars)
assert results.empty
results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}")
assert len(results) == 1
conn.close()
def test_time_filter() -> None:
record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created
record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated
record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later
record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966']
record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier
record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in",
'2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid...
data = [record_A, record_B, record_C, record_D, record_E]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966')
assert len(results) == 3
results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965')
assert len(results) == 3
results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965')
assert len(results) == 1
results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966')
assert len(results) == 5
results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966')
assert results.empty
results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966')
assert len(results) == 2
results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965')
assert len(results) == 2
results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965')
assert len(results) == 4
data = [record_A, record_B, record_C, record_D, record_F]
cols = ["name",
"ensemble",
"code",
"path",
"project",
"parameters",
"parameter_file",
"created_at",
"updated_at"]
df = pd.DataFrame(data,columns=cols)
with pytest.raises(ValueError):
results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')