From 65cd55ec0a8d2afbe5a54159cff393d80da466bd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 16:36:31 +0200 Subject: [PATCH 1/2] add test on whether paths are indeed unique --- corrlib/integrity.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index db242f6..70e4694 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -15,6 +15,14 @@ def has_valid_times(result: pd.DataFrame) -> bool: return False return True +def are_keys_unique(db: Path, table: str, col: str) -> bool: + conn = sqlite3.connect(db) + c = conn.cursor() + c.execute(f"SELECT COUNT( DISTINCT CAST(path AS nvarchar(4000))), COUNT({col}) FROM {table};") + results = c.fetchall()[0] + conn.close() + return bool(results[0] == results[1]) + def check_db_integrity(path: Path) -> None: db = get_db_file(path) @@ -27,6 +35,7 @@ def check_db_integrity(path: Path) -> None: raise ValueError(f"Result with id {result[id]} has wrong time signatures.") + def full_integrity_check(path: Path) -> None: check_db_integrity(path) From 85698c377bca7405d69c63d13d3ef918d35aaf1a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 14 Apr 2026 16:42:39 +0200 Subject: [PATCH 2/2] use uniqueness for complete db check --- corrlib/integrity.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/corrlib/integrity.py b/corrlib/integrity.py index 70e4694..8a414bf 100644 --- a/corrlib/integrity.py +++ b/corrlib/integrity.py @@ -26,6 +26,10 @@ def are_keys_unique(db: Path, table: str, col: str) -> bool: def check_db_integrity(path: Path) -> None: db = get_db_file(path) + + if not are_keys_unique(db, 'backlogs', 'path'): + raise Exception("The paths the backlog table of the database links are not unique.") + search_expr = "SELECT * FROM 'backlogs'" conn = sqlite3.connect(db) results = pd.read_sql(search_expr, conn)