diff --git a/corrlib/integrity.py b/corrlib/integrity.py index dc1216c..5f80aa3 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -3,6 +3,10 @@ from pathlib import Path from .tools import get_db_file import pandas as pd import sqlite3 +from .tracker import get +import pyerrors.input.json as pj + +from typing import Any def has_valid_times(result: pd.Series) -> bool: @@ -38,10 +42,46 @@ def check_db_integrity(path: Path) -> None: if not has_valid_times(result): raise ValueError(f"Result with id {result[id]} has wrong time signatures.") print("DB:\t✅") + return + + +def _check_db2paths(path: Path, meas_paths: list[str]) -> None: + needed_data: dict[str, list[str]] = {} + for mpath in meas_paths: + file = mpath.split("::")[0] + if file not in needed_data.keys(): + needed_data[file] = [] + key = mpath.split("::")[1] + needed_data[file].append(key) + + totf = len(needed_data.keys()) + for i, file in enumerate(needed_data.keys()): + print(f"Check against file {i}/{totf}: {file}") + get(path, Path(file)) + filedict: dict[str, Any] = pj.load_json_dict(str(path / file)) + if not set(filedict.keys()).issubset(needed_data[file]): + for key in filedict.keys(): + if key not in needed_data[file]: + raise ValueError(f"Found unintended key {key} in file {file}.") + if not set(needed_data[file]).issubset(filedict.keys()): + for key in needed_data[file]: + if key not in filedict.keys(): + raise ValueError(f"Did not find data for key {key} that should be in file {file}.") + print("Links:\t✅") + return + + +def check_db_file_links(path: Path) -> None: + db = get_db_file(path) + search_expr = "SELECT path FROM 'backlogs'" + conn = sqlite3.connect(path / db) + results = pd.read_sql(search_expr, conn)['path'].values + _check_db2paths(path, list(results)) def full_integrity_check(path: Path) -> None: check_db_integrity(path) + check_db_file_links(path) print("Full:\t✅") diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index f4e8a83..cbd9386 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -11,6 +11,7 @@ from .tracker import get, save, unlock import shutil from typing import Any from pathlib import Path +from .integrity import _check_db2paths CACHE_DIR = ".cache" @@ -153,7 +154,7 @@ def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]: return load_records(path, [meas_path])[0] -def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: +def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}, dry_run: bool = False) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -163,14 +164,19 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = Path of the correlator library. meas_paths: list[str] A list of the paths to the correlator in the backlog system. - perloaded: dict[str, Any] - The data that is already prelaoded. Of interest if data has alread been loaded in the same script. + preloaded: dict[str, Any] + The data that is already preloaded. Of interest if data has alread been loaded in the same script. + dry_run: bool + Do not load datda, just check whether we can reach the data we are interested in. Returns ------- - retruned_data: list + returned_data: list The loaded records. """ + if dry_run: + _check_db2paths(path, meas_paths) + return [] needed_data: dict[str, list[str]] = {} for mpath in meas_paths: file = mpath.split("::")[0]