import corrlib.find as find import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest import pandas as pd import datalad.api as dl def make_sql(path: Path) -> Path: db = path / "test.db" cinit._create_db(db) return db def test_find_lookup_by_one_alias(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() assert uuid == find._project_lookup_by_alias(db, "fun_project") uuid = "test_uuid2" alias_str = "fun_project" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() def test_find_project(tmp_path: Path) -> None: cinit.create(tmp_path) db = tmp_path / "backlogger.db" dl.unlock(str(db), dataset=str(tmp_path)) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() assert uuid == find.find_project(tmp_path, "fun_project") uuid = "test_uuid2" alias_str = "fun_project" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") conn.close() def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() conn.close() result = find._project_lookup_by_id(db, uuid)[0] assert uuid == result[0] assert alias_str == result[1] assert tag_str == result[2] assert owner == result[3] assert code == result[4] def test_db_lookup(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) c = conn.cursor() corr = "f_A" ensemble = "SF_A" code = "openQCD" meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" uuid = "Project_A" pars = "{par_A: 3.0, par_B: 5.0}" parameter_file = "projects/Project_A/myinput.in" c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) conn.commit() results = find._db_lookup(db, ensemble, corr, code) assert len(results) == 1 results = find._db_lookup(db, "SF_B", corr, code) assert results.empty results = find._db_lookup(db, ensemble, "g_A", code) assert results.empty results = find._db_lookup(db, ensemble, corr, "sfcf") assert results.empty results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, parameters = "{par_A: 3.0, par_B: 4.0}") assert results.empty corr = "g_A" ensemble = "SF_A" code = "openQCD" meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" uuid = "Project_A" pars = "{par_A: 3.0, par_B: 4.0}" parameter_file = "projects/Project_A/myinput.in" c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) conn.commit() corr = "f_A" results = find._db_lookup(db, ensemble, corr, code) assert len(results) == 1 results = find._db_lookup(db, "SF_B", corr, code) assert results.empty results = find._db_lookup(db, ensemble, "g_A", code) assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, "sfcf") assert results.empty results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") assert len(results) == 1 results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") assert results.empty results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert results.empty results = find._db_lookup(db, ensemble, "g_A", code, parameters = "{par_A: 3.0, par_B: 4.0}") assert len(results) == 1 conn.close() def test_time_filter() -> None: record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{par_A: 5.0, par_B: 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... data = [record_A, record_B, record_C, record_D, record_E] cols = ["name", "ensemble", "code", "path", "project", "parameters", "parameter_file", "created_at", "updated_at"] df = pd.DataFrame(data,columns=cols) results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') assert len(results) == 3 results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') assert len(results) == 3 results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') assert len(results) == 1 results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') assert len(results) == 2 results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') assert len(results) == 2 results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') assert len(results) == 4 results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') assert len(results) == 3 results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') assert len(results) == 3 results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') assert len(results) == 1 results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') assert len(results) == 2 results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') assert len(results) == 2 results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') assert len(results) == 4 data = [record_A, record_B, record_C, record_D, record_F] cols = ["name", "ensemble", "code", "path", "project", "parameters", "parameter_file", "created_at", "updated_at"] df = pd.DataFrame(data,columns=cols) with pytest.raises(ValueError): results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966')