corrlib/corrlib/integrity.py

84 lines
2.7 KiB
Python

import datetime as dt
from pathlib import Path
from .tools import get_db_file
import pandas as pd
import sqlite3
from .tracker import get
import pyerrors.input.json as pj
def has_valid_times(result: pd.Series) -> bool:
# we expect created_at <= updated_at <= now
created_at = dt.datetime.fromisoformat(result['created_at'])
updated_at = dt.datetime.fromisoformat(result['updated_at'])
if created_at > updated_at:
return False
if updated_at > dt.datetime.now():
return False
return True
def are_keys_unique(db: Path, table: str, col: str) -> bool:
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};")
results = c.fetchall()[0]
conn.close()
return bool(results[0] == results[1])
def check_db_integrity(path: Path) -> None:
db = get_db_file(path)
if not are_keys_unique(path / db, 'backlogs', 'path'):
raise Exception("The paths the backlog table of the database links are not unique.")
search_expr = "SELECT * FROM 'backlogs'"
conn = sqlite3.connect(path / db)
results = pd.read_sql(search_expr, conn)
for _, result in results.iterrows():
if not has_valid_times(result):
raise ValueError(f"Result with id {result[id]} has wrong time signatures.")
print("DB:\t")
return
def _check_db2paths(path: Path, meas_paths: list[str]) -> None:
needed_data: dict[str, list[str]] = {}
for mpath in meas_paths:
file = mpath.split("::")[0]
if file not in needed_data.keys():
needed_data[file] = []
key = mpath.split("::")[1]
needed_data[file].append(key)
for file in needed_data.keys():
get(path, Path(file))
filedict: dict[str, Any] = pj.load_json_dict(str(path / file))
if not set(filedict.keys()).issubset(needed_data[file]):
for key in filedict.keys():
if key not in needed_data[file]:
raise ValueError(f"Found unintended key {key} in file {file}.")
elif not set(needed_data[file]).issubset(filedict.keys()):
for key in needed_data[file]:
if key not in filedict.keys():
raise ValueError(f"Did not find data for key {key} that should be in file {file}.")
print("Links:\t")
return
def check_db_file_links(path: Path) -> None:
db = get_db_file(path)
search_expr = "SELECT path FROM 'backlogs'"
conn = sqlite3.connect(path / db)
results = pd.read_sql(search_expr, conn)['path'].values
print(results)
_check_db2paths(path, results)
def full_integrity_check(path: Path) -> None:
check_db_integrity(path)
check_db_file_links(path)
print("Full:\t")