diff --git a/corrlib/integrity.py b/corrlib/integrity.py index dc1216c..63572a9 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -3,6 +3,8 @@ from pathlib import Path from .tools import get_db_file import pandas as pd import sqlite3 +from .tracker import get +import pyerrors.input.json as pj def has_valid_times(result: pd.Series) -> bool: @@ -38,10 +40,45 @@ def check_db_integrity(path: Path) -> None: if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") print("DB:\t✅") + return + + +def _check_db2paths(path: Path, meas_paths: list[str]) -> None: + needed_data: dict[str, list[str]] = {} + for mpath in meas_paths: + file = mpath.split("::")[0] + if file not in needed_data.keys(): + needed_data[file] = [] + key = mpath.split("::")[1] + needed_data[file].append(key) + + for file in needed_data.keys(): + get(path, Path(file)) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) + if not set(filedict.keys()).issubset(needed_data[file]): + for key in filedict.keys(): + if key not in needed_data[file]: + raise ValueError(f"Found unintended key {key} in file {file}.") + elif not set(needed_data[file]).issubset(filedict.keys()): + for key in needed_data[file]: + if key not in filedict.keys(): + raise ValueError(f"Did not find data for key {key} that should be in file {file}.") + print("Links:\t✅") + return + + +def check_db_file_links(path: Path) -> None: + db = get_db_file(path) + search_expr = "SELECT path FROM 'backlogs'" + conn = sqlite3.connect(path / db) + results = pd.read_sql(search_expr, conn)['path'].values + print(results) + _check_db2paths(path, results) def full_integrity_check(path: Path) -> None: check_db_integrity(path) + check_db_file_links(path) print("Full:\t✅")