import corrlib.find as find import sqlite3 from pathlib import Path import corrlib.initialization as cinit import pytest import pandas as pd import datalad.api as dl def make_sql(path: Path) -> Path: db = path / "test.db" cinit._create_db(db) return db def test_find_lookup_by_one_alias(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() assert uuid == find._project_lookup_by_alias(db, "fun_project") uuid = "test_uuid2" alias_str = "fun_project" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(db, "fun_project") conn.close() def test_find_lookup_by_id(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() conn.close() result = find._project_lookup_by_id(db, uuid)[0] assert uuid == result[0] assert alias_str == result[1] assert tag_str == result[2] assert owner == result[3] assert code == result[4] def test_time_filter() -> None: record_A = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] # only created record_B = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-04-26 12:55:18.229966'] # created and updated record_C = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-05-26 12:55:18.229966'] # created and updated later record_D = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2026-03-27 12:55:18.229966'] record_E = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2024-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # only created, earlier record_F = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2026-03-26 12:55:18.229966', '2024-03-26 12:55:18.229966'] # this is invalid... data = [record_A, record_B, record_C, record_D, record_E] cols = ["name", "ensemble", "code", "path", "project", "parameters", "parameter_file", "created_at", "updated_at"] df = pd.DataFrame(data,columns=cols) results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, created_before='2027-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, created_before='2026-03-25 12:55:18.229966') assert len(results) == 3 results = find._time_filter(df, created_before='2026-03-26 12:55:18.229965') assert len(results) == 3 results = find._time_filter(df, created_before='2025-03-04 12:55:18.229965') assert len(results) == 1 results = find._time_filter(df, created_after='2023-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, created_after='2027-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, created_after='2026-03-25 12:55:18.229966') assert len(results) == 2 results = find._time_filter(df, created_after='2026-03-26 12:55:18.229965') assert len(results) == 2 results = find._time_filter(df, created_after='2025-03-04 12:55:18.229965') assert len(results) == 4 results = find._time_filter(df, updated_before='2023-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, updated_before='2027-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, updated_before='2026-03-25 12:55:18.229966') assert len(results) == 3 results = find._time_filter(df, updated_before='2026-03-26 12:55:18.229965') assert len(results) == 3 results = find._time_filter(df, updated_before='2025-03-04 12:55:18.229965') assert len(results) == 1 results = find._time_filter(df, updated_after='2023-03-26 12:55:18.229966') assert len(results) == 5 results = find._time_filter(df, updated_after='2027-03-26 12:55:18.229966') assert results.empty results = find._time_filter(df, updated_after='2026-03-25 12:55:18.229966') assert len(results) == 2 results = find._time_filter(df, updated_after='2026-03-26 12:55:18.229965') assert len(results) == 2 results = find._time_filter(df, updated_after='2025-03-04 12:55:18.229965') assert len(results) == 4 data = [record_A, record_B, record_C, record_D, record_F] cols = ["name", "ensemble", "code", "path", "project", "parameters", "parameter_file", "created_at", "updated_at"] df = pd.DataFrame(data,columns=cols) with pytest.raises(ValueError): results = find._time_filter(df, created_before='2023-03-26 12:55:18.229966') def test_db_lookup(tmp_path: Path) -> None: db = make_sql(tmp_path) conn = sqlite3.connect(db) c = conn.cursor() corr = "f_A" ensemble = "SF_A" code = "openQCD" meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" uuid = "Project_A" pars = "{par_A: 3.0, par_B: 5.0}" parameter_file = "projects/Project_A/myinput.in" c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) conn.commit() results = find._db_lookup(db, ensemble, corr, code) assert len(results) == 1 results = find._db_lookup(db, "SF_B", corr, code) assert results.empty results = find._db_lookup(db, ensemble, "g_A", code) assert results.empty results = find._db_lookup(db, ensemble, corr, "sfcf") assert results.empty results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, parameters = '{"par_A": 3.0, "par_B": 4.0}') assert results.empty corr = "g_A" ensemble = "SF_A" code = "openQCD" meas_path = "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf" uuid = "Project_A" pars = '{"par_A": 3.0, "par_B": 4.0}' parameter_file = "projects/Project_A/myinput.in" c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars, parameter_file)) conn.commit() corr = "f_A" results = find._db_lookup(db, ensemble, corr, code) assert len(results) == 1 results = find._db_lookup(db, "SF_B", corr, code) assert results.empty results = find._db_lookup(db, ensemble, "g_A", code) assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, "sfcf") assert results.empty results = find._db_lookup(db, ensemble, corr, code, project = "Project_A") assert len(results) == 1 results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_A") assert len(results) == 1 results = find._db_lookup(db, ensemble, corr, code, project = "Project_B") assert results.empty results = find._db_lookup(db, ensemble, "g_A", code, project = "Project_B") assert results.empty results = find._db_lookup(db, ensemble, corr, code, parameters = pars) assert results.empty results = find._db_lookup(db, ensemble, "g_A", code, parameters = '{"par_A": 3.0, "par_B": 4.0}') assert len(results) == 1 conn.close() def test_sfcf_drop() -> None: parameters0 = { 'offset': [0,0,0], 'quarks': [{'mass': 1, 'thetas': [0,0,0]}, {'mass': 2, 'thetas': [0,0,1]}], # m0s = -3.5, -3.75 'wf1': [[1, [0, 0]], [0.5, [1, 0]], [.75, [.5, .5]]], 'wf2': [[1, [2, 1]], [2, [0.5, -0.5]], [.5, [.75, .72]]], } assert not find._sfcf_drop(parameters0, offset=[0,0,0]) assert find._sfcf_drop(parameters0, offset=[1,0,0]) assert not find._sfcf_drop(parameters0, quark_kappas = [1, 2]) assert find._sfcf_drop(parameters0, quark_kappas = [-3.1, -3.72]) assert not find._sfcf_drop(parameters0, quark_masses = [-3.5, -3.75]) assert find._sfcf_drop(parameters0, quark_masses = [-3.1, -3.72]) assert not find._sfcf_drop(parameters0, qk1 = 1) assert not find._sfcf_drop(parameters0, qk2 = 2) assert find._sfcf_drop(parameters0, qk1 = 2) assert find._sfcf_drop(parameters0, qk2 = 1) assert not find._sfcf_drop(parameters0, qk1 = [0.5,1.5]) assert not find._sfcf_drop(parameters0, qk2 = [1.5,2.5]) assert find._sfcf_drop(parameters0, qk1 = 2) assert find._sfcf_drop(parameters0, qk2 = 1) with pytest.raises(ValueError): assert not find._sfcf_drop(parameters0, qk1 = [0.5,1,5]) with pytest.raises(ValueError): assert not find._sfcf_drop(parameters0, qk2 = [1,5,2.5]) assert find._sfcf_drop(parameters0, qm1 = 1.2) assert find._sfcf_drop(parameters0, qm2 = 2.2) assert not find._sfcf_drop(parameters0, qm1 = -3.5) assert not find._sfcf_drop(parameters0, qm2 = -3.75) assert find._sfcf_drop(parameters0, qm2 = 1.2) assert find._sfcf_drop(parameters0, qm1 = 2.2) with pytest.raises(ValueError): assert not find._sfcf_drop(parameters0, qm1 = [0.5,1,5]) with pytest.raises(ValueError): assert not find._sfcf_drop(parameters0, qm2 = [1,5,2.5]) def test_openQCD_filter() -> None: assert True def test_code_filter() -> None: record_0 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_1 = ["f_A", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_2 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_3 = ["f_P", "ensA", "sfcf", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_4 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_5 = ["f_A", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_6 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_7 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] record_8 = ["f_P", "ensA", "openQCD", "archive/SF_A/f_A/Project_A.json.gz::asdfasdfasdf", "SF_A", '{"par_A": 5.0, "par_B": 5.0}', "projects/SF_A/input.in", '2025-03-26 12:55:18.229966', '2025-03-26 12:55:18.229966'] data = [ record_0, record_1, record_2, record_3, ] cols = ["name", "ensemble", "code", "path", "project", "parameters", "parameter_file", "created_at", "updated_at"] df = pd.DataFrame(data,columns=cols) res = find._code_filter(df, "sfcf") assert len(res) == 4 data = [ record_4, record_5, record_6, record_7, record_8, ] cols = ["name", "ensemble", "code", "path", "project", "parameters", "parameter_file", "created_at", "updated_at"] df = pd.DataFrame(data,columns=cols) res = find._code_filter(df, "openQCD") assert len(res) == 5 with pytest.raises(ValueError): res = find._code_filter(df, "asdf") def test_find_record() -> None: assert True def test_find_project(tmp_path: Path) -> None: cinit.create(tmp_path) db = tmp_path / "backlogger.db" dl.unlock(str(db), dataset=str(tmp_path)) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() assert uuid == find.find_project(tmp_path, "fun_project") uuid = "test_uuid2" alias_str = "fun_project" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() with pytest.raises(Exception): assert uuid == find._project_lookup_by_alias(tmp_path, "fun_project") conn.close() def test_list_projects(tmp_path: Path) -> None: cinit.create(tmp_path) db = tmp_path / "backlogger.db" dl.unlock(str(db), dataset=str(tmp_path)) conn = sqlite3.connect(db) c = conn.cursor() uuid = "test_uuid" alias_str = "fun_project" tag_str = "tt" owner = "tester" code = "test_code" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) uuid = "test_uuid2" alias_str = "fun_project2" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) uuid = "test_uuid3" alias_str = "fun_project3" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) uuid = "test_uuid4" alias_str = "fun_project4" c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() conn.close() results = find.list_projects(tmp_path) assert len(results) == 4 for i in range(4): assert len(results[i]) == 2