Merge pull request 'integ/links' (#34) from integ/links into develop

Reviewed-on: https://www.kuhl-mann.de/git/git/jkuhl/corrlib/pulls/34
2026-04-17 18:09:17 +02:00 · 2026-04-17 18:09:17 +02:00 · 702010c8fc
commit 702010c8fc
parent 4411f63984 51ae53aa02
2 changed files with 50 additions and 4 deletions
--- a/corrlib/integrity.py
+++ b/corrlib/integrity.py
@ -3,6 +3,10 @@ from pathlib import Path
 from .tools import get_db_file
 import pandas as pd
 import sqlite3
 from .tracker import get
 import pyerrors.input.json as pj
 from typing import Any
 def has_valid_times(result: pd.Series) -> bool:
@ -38,10 +42,46 @@ def check_db_integrity(path: Path) -> None:
        if not has_valid_times(result):
            raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
    print("DB:\t✅")
    return
 def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
    needed_data: dict[str, list[str]] = {}
    for mpath in meas_paths:
        file = mpath.split("::")[0]
        if file not in needed_data.keys():
            needed_data[file] = []
        key = mpath.split("::")[1]
        needed_data[file].append(key)
    totf = len(needed_data.keys())
    for i, file in enumerate(needed_data.keys()):
        print(f"Check against file {i}/{totf}: {file}")
        get(path, Path(file))
        filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
        if not set(filedict.keys()).issubset(needed_data[file]):
            for key in filedict.keys():
                if key not in needed_data[file]:
                    raise ValueError(f"Found unintended key {key} in file {file}.")
        if not set(needed_data[file]).issubset(filedict.keys()):
            for key in needed_data[file]:
                if key not in filedict.keys():
                    raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
    print("Links:\t✅")
    return
 def check_db_file_links(path: Path) -> None:
    db = get_db_file(path)
    search_expr = "SELECT path FROM 'backlogs'"
    conn = sqlite3.connect(path / db)
    results = pd.read_sql(search_expr, conn)['path'].values
    _check_db2paths(path, list(results))
 def full_integrity_check(path: Path) -> None:
    check_db_integrity(path)
    check_db_file_links(path)
    print("Full:\t✅")
--- a/corrlib/meas_io.py
+++ b/corrlib/meas_io.py
@ -11,6 +11,7 @@ from .tracker import get, save, unlock
 import shutil
 from typing import Any
 from pathlib import Path
 from .integrity import _check_db2paths
 CACHE_DIR = ".cache"
@ -153,7 +154,7 @@ def load_record(path: Path, meas_path: str) -> Union[Corr, Obs]:
    return load_records(path, [meas_path])[0]
-def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]:
+def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] = {}, dry_run: bool = False) -> list[Union[Corr, Obs]]:
    """
    Load a list of records by their paths.
@ -163,14 +164,19 @@ def load_records(path: Path, meas_paths: list[str], preloaded: dict[str, Any] =
        Path of the correlator library.
    meas_paths: list[str]
        A list of the paths to the correlator in the backlog system.
-    perloaded: dict[str, Any]
+    preloaded: dict[str, Any]
-        The data that is already prelaoded. Of interest if data has alread been loaded in the same script.
+        The data that is already preloaded. Of interest if data has alread been loaded in the same script.
    dry_run: bool
        Do not load datda, just check whether we can reach the data we are interested in.
    Returns
    -------
-    retruned_data: list
+    returned_data: list
        The loaded records.
    """
    if dry_run:
        _check_db2paths(path, meas_paths)
        return []
    needed_data: dict[str, list[str]] = {}
    for mpath in meas_paths:
        file = mpath.split("::")[0]