From 50ff178a0cfd4979c68885cd6249c9a2820ac517 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 2 Sep 2025 10:30:56 +0000 Subject: [PATCH 01/13] import sha1 as groundlayer --- corrlib/meas_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 1a06dac..fca5671 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -6,7 +6,7 @@ from .input import sfcf,openQCD import json from typing import Union from pyerrors import Obs, Corr, dump_object, load_object -from hashlib import sha256 +from hashlib import sha256, sha1 from .tools import cached import shutil From 5e87f569e2a8c085fe942c6774d22039a985056f Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 7 Nov 2025 09:43:58 +0000 Subject: [PATCH 02/13] update TODO --- TODO.md | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/TODO.md b/TODO.md index 4153fc3..85074f8 100644 --- a/TODO.md +++ b/TODO.md @@ -1,14 +1,17 @@ # TODO ## Features -- implement import of non-datalad projects -- implement a way to use another backlog repo as a project - -- find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project - - this could e.g. be done along the lines of mandatory documentation -- keep better track of the versions of the code, that was used for a specific measurement. - - maybe let this be an input in the project file? - - git repo and commit hash/version tag - +- [ ] implement import of non-datalad projects +- [ ] implement a way to use another backlog repo as a project +- [ ] make cache deadlock resistent (no read while writing) +- [ ] find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project + - [ ] this could e.g. be done along the lines of mandatory documentation +- [ ] keep better track of the versions of the code, that was used for a specific measurement. + - [ ] maybe let this be an input in the project file? + - [ ] git repo and commit hash/version tag + - [ ] implement a code table? +- [ ] parallel processing of measurements +- [ ] extra SQL table for ensembles with UUID and aliases ## Bugfixes - [ ] revisit the reimport function for single files +- [ ] drop record needs to look if no records are left in a json file. From 085256857d5f3cefc413a6a4364354b886f1e419 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:07:16 +0100 Subject: [PATCH 03/13] add test notebook to ignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1530e48..e7385f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ pyerrors_corrlib.egg-info __pycache__ -*.egg-info \ No newline at end of file +*.egg-info +test.ipynb From 73d7687359b4d8f1b5e8351f6700fff8be0a6b4e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:08:03 +0100 Subject: [PATCH 04/13] update TODO and README --- README.md | 9 +++++++++ TODO.md | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/README.md b/README.md index 976ae57..0f6c9a3 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,12 @@ This is done in a reproducible way using `datalad`. In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together. Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements. The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors. + +We work with the following nomenclature in this project: +- Measurement + A setis of Observables, including the appropriate metadata. +- Project + A series of measurements that was done by one person as part of their research. +- Record + An entry of a single Correlator in the database of the backlogger. +- \ No newline at end of file diff --git a/TODO.md b/TODO.md index 85074f8..ba32ec9 100644 --- a/TODO.md +++ b/TODO.md @@ -15,3 +15,7 @@ ## Bugfixes - [ ] revisit the reimport function for single files - [ ] drop record needs to look if no records are left in a json file. + +## Rough Ideas +- [ ] multitable could provide a high speed implementation of an HDF5 based format +- [ ] implement also a way to include compiled binaries in the archives. From 91c7a9d95de7e5fb9be4f357798158226f31240a Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:11:37 +0100 Subject: [PATCH 05/13] clean up --- projects/tmp | 1 - tests/test_import_project.py | 0 2 files changed, 1 deletion(-) delete mode 160000 projects/tmp delete mode 100644 tests/test_import_project.py diff --git a/projects/tmp b/projects/tmp deleted file mode 160000 index 216fe4e..0000000 --- a/projects/tmp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 216fe4ed3467ed486390735f8072856cf3d0a409 diff --git a/tests/test_import_project.py b/tests/test_import_project.py deleted file mode 100644 index e69de29..0000000 From 4f3e78177e0497ff344f4d949cb8af8cbfb6c832 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:13:17 +0100 Subject: [PATCH 06/13] refactor io --- corrlib/cache_io.py | 33 +++++++++++++++++++++++++++++++++ corrlib/meas_io.py | 31 +++++++------------------------ corrlib/tools.py | 6 ++++++ 3 files changed, 46 insertions(+), 24 deletions(-) create mode 100644 corrlib/cache_io.py diff --git a/corrlib/cache_io.py b/corrlib/cache_io.py new file mode 100644 index 0000000..4d1d632 --- /dev/null +++ b/corrlib/cache_io.py @@ -0,0 +1,33 @@ +from typing import Union, Optional +import os +import shutil + + +def drop_cache_files(path: str, fs: Optional[list[str]]=None): + cache_dir = os.path.join(path, ".cache") + if fs is None: + fs = os.listdir(cache_dir) + for f in fs: + shutil.rmtree(os.path.join(cache_dir, f)) + + +def cache_dir(path, file): + cache_path_list = [path] + cache_path_list.append(".cache") + cache_path_list.extend(file.split("/")[1:]) + cache_path = "/".join(cache_path_list) + return cache_path + + +def cache_path(path, file, hash, key): + cache_path = os.path.join(cache_dir(path, file), hash, key) + return cache_path + +def is_in_cache(path, record, hash): + + if os.file.exists(cache_path(path, file, hash, key)): + return True + else: + return False + + \ No newline at end of file diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index fca5671..ad9a6e8 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -4,14 +4,15 @@ import datalad.api as dl import sqlite3 from .input import sfcf,openQCD import json -from typing import Union +from typing import Union, Optional from pyerrors import Obs, Corr, dump_object, load_object from hashlib import sha256, sha1 -from .tools import cached +from .tools import cached, record2name_key import shutil +from .caching import cache_path, cache_dir -def write_measurement(path, ensemble, measurement, uuid, code, parameter_file=None): +def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None): """ Write a measurement to the backlog. If the file for the measurement already exists, update the measurement. @@ -115,7 +116,7 @@ def load_record(path: str, meas_path: str): return load_records(path, [meas_path])[0] -def load_records(path: str, meas_paths: list[str], preloaded = {}) -> list[Union[Corr, Obs]]: +def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Union[Corr, Obs]]: """ Load a list of records by their paths. @@ -131,11 +132,10 @@ def load_records(path: str, meas_paths: list[str], preloaded = {}) -> list[Union List """ needed_data: dict[str, list[str]] = {} - for mpath in meas_paths: - file = mpath.split("::")[0] + for rpath in record_paths: + file, key = record2name_key(rpath) if file not in needed_data.keys(): needed_data[file] = [] - key = mpath.split("::")[1] needed_data[file].append(key) returned_data: list = [] for file in needed_data.keys(): @@ -153,19 +153,6 @@ def load_records(path: str, meas_paths: list[str], preloaded = {}) -> list[Union return returned_data -def cache_dir(path, file): - cache_path_list = [path] - cache_path_list.append(".cache") - cache_path_list.extend(file.split("/")[1:]) - cache_path = "/".join(cache_path_list) - return cache_path - - -def cache_path(path, file, key): - cache_path = os.path.join(cache_dir(path, file), key) - return cache_path - - def preload(path: str, file: str): dl.get(os.path.join(path, file), dataset=path) filedict = pj.load_json_dict(os.path.join(path, file)) @@ -196,7 +183,3 @@ def drop_record(path: str, meas_path: str): else: raise ValueError("This measurement does not exist as a file!") -def drop_cache(path: str): - cache_dir = os.path.join(path, ".cache") - for f in os.listdir(cache_dir): - shutil.rmtree(os.path.join(cache_dir, f)) diff --git a/corrlib/tools.py b/corrlib/tools.py index 3ac8bfe..44697cc 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -16,3 +16,9 @@ def m2k(m): def k2m(k): return (1/(2*k))-4 + + +def record2name_key(record_path: str): + file = record_path.split("::")[0] + key = record_path.split("::")[1] + return file, key \ No newline at end of file From a080ca835f7d22299bb7c21b6d9dea710549aaf3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 11:53:25 +0100 Subject: [PATCH 07/13] write wrapper hash method --- corrlib/tools.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index 44697cc..70eb518 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,5 +1,5 @@ - - +import hashlib +import os def str2list(string): return string.split(",") @@ -21,4 +21,11 @@ def k2m(k): def record2name_key(record_path: str): file = record_path.split("::")[0] key = record_path.split("::")[1] - return file, key \ No newline at end of file + return file, key + + +def make_version_hash(path, record): + file, key = record2name_key(record) + with open(os.path.join(path, file), 'rb') as fp: + file_hash = hashlib.file_digest(fp, 'sha1').hexdigest() + return file_hash From f47a9caae7c55dbcb83c430262b458b5ef78e33c Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 21:46:15 +0100 Subject: [PATCH 08/13] include a test dataset --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e7385f6..dd21dea 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ pyerrors_corrlib.egg-info __pycache__ *.egg-info test.ipynb +test_ds From df71ee5ad9f2083668f2a2c75831858917ba1626 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 21:47:45 +0100 Subject: [PATCH 09/13] add gitpython --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index add6910..6b8794e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup(name='pycorrlib', version=version['__version__'], author='Justus Kuhlmann', author_email='j_kuhl19@uni-muenster.de', - install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5'], + install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5', 'gitpython>=3.1.45'], entry_points = { 'console_scripts': ['pcl=corrlib.cli:app'], }, From c9fe09d9d6f0baa1a8cb54e405a3bae9ade91783 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 21:50:59 +0100 Subject: [PATCH 10/13] add functionality to automatically register when cache has an old version of an archived file --- corrlib/__init__.py | 1 + corrlib/cache_io.py | 45 ++++++++++++++++++++++++++++++--------- corrlib/cli.py | 4 ++-- corrlib/initialization.py | 3 ++- corrlib/meas_io.py | 41 ++++++++++++++++++++++------------- 5 files changed, 66 insertions(+), 28 deletions(-) diff --git a/corrlib/__init__.py b/corrlib/__init__.py index 91b07f4..41d8691 100644 --- a/corrlib/__init__.py +++ b/corrlib/__init__.py @@ -19,5 +19,6 @@ from .main import * from .import input as input from .initialization import * from .meas_io import * +from .cache_io import * from .find import * from .version import __version__ diff --git a/corrlib/cache_io.py b/corrlib/cache_io.py index 4d1d632..c890164 100644 --- a/corrlib/cache_io.py +++ b/corrlib/cache_io.py @@ -1,6 +1,19 @@ from typing import Union, Optional import os import shutil +from .tools import record2name_key +from pyerrors import dump_object +import datalad.api as dl +import sqlite3 + + +def get_version_hash(path, record): + db = os.path.join(path, "backlogger.db") + dl.get(db, dataset=path) + conn = sqlite3.connect(db) + c = conn.cursor() + c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'") + return c.fetchall()[0][0] def drop_cache_files(path: str, fs: Optional[list[str]]=None): @@ -19,15 +32,27 @@ def cache_dir(path, file): return cache_path -def cache_path(path, file, hash, key): - cache_path = os.path.join(cache_dir(path, file), hash, key) +def cache_path(path, file, sha_hash, key): + cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash) return cache_path -def is_in_cache(path, record, hash): - - if os.file.exists(cache_path(path, file, hash, key)): - return True - else: - return False - - \ No newline at end of file + +def is_old_version(path, record): + version_hash = get_version_hash(path, record) + file, key = record2name_key(record) + meas_cache_path = os.path.join(cache_dir(path, file)) + ls = [] + for p, ds, fs in os.walk(meas_cache_path): + ls.extend(fs) + for filename in ls: + if key == filename.split("_")[0]: + if not version_hash == filename.split("_")[1][:-2]: + return True + else: + return False + + +def is_in_cache(path, record): + version_hash = get_version_hash(path, record) + file, key = record2name_key(record) + return os.path.exists(cache_path(path, file, version_hash, key) + ".p") diff --git a/corrlib/cli.py b/corrlib/cli.py index b808c13..44ede1b 100644 --- a/corrlib/cli.py +++ b/corrlib/cli.py @@ -6,7 +6,7 @@ from .toml import import_tomls, update_project, reimport_project from .find import find_record, list_projects from .tools import str2list from .main import update_aliases -from .meas_io import drop_cache as mio_drop_cache +from .cache_io import drop_cache_files as cio_drop_cache_files import os @@ -171,7 +171,7 @@ def drop_cache( """ Drop the currect cache directory of the dataset. """ - mio_drop_cache(path) + cio_drop_cache_files(path) return diff --git a/corrlib/initialization.py b/corrlib/initialization.py index f6ef5aa..e5c0ede 100644 --- a/corrlib/initialization.py +++ b/corrlib/initialization.py @@ -21,7 +21,8 @@ def _create_db(db): parameters TEXT, parameter_file TEXT, created_at TEXT, - updated_at TEXT)''') + updated_at TEXT, + current_version TEXT)''') c.execute('''CREATE TABLE IF NOT EXISTS projects (id TEXT PRIMARY KEY, aliases TEXT, diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index ad9a6e8..ff7cdc8 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -5,11 +5,10 @@ import sqlite3 from .input import sfcf,openQCD import json from typing import Union, Optional -from pyerrors import Obs, Corr, dump_object, load_object -from hashlib import sha256, sha1 -from .tools import cached, record2name_key -import shutil -from .caching import cache_path, cache_dir +from pyerrors import Obs, Corr, load_object, dump_object +from hashlib import sha256 +from .tools import cached, record2name_key, make_version_hash +from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None): @@ -79,11 +78,13 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O subkey = "/".join(par_list) subkeys = [subkey] pars[subkey] = json.dumps(parameters) - for subkey in subkeys: - parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() - meas_path = file_in_archive + "::" + parHash - known_meas[parHash] = measurement[corr][subkey] + meas_paths = [] + for subkey in subkeys: + par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() + meas_path = file_in_archive + "::" + par_hash + meas_paths.append(meas_path) + known_meas[par_hash] = measurement[corr][subkey] if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) @@ -92,7 +93,12 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) conn.commit() pj.dump_dict_to_json(known_meas, file) - files.append(path + '/backlogger.db') + for meas_path in meas_paths: + version_hash = make_version_hash(path, meas_path) + print(version_hash) + c.execute("UPDATE backlogs SET current_version = ? WHERE project = ? AND code = ? and name = ?", (version_hash, uuid, code, corr)) + conn.commit() + files.append(db) conn.close() dl.save(files, message="Add measurements to database", dataset=path) @@ -140,16 +146,21 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni returned_data: list = [] for file in needed_data.keys(): for key in list(needed_data[file]): - if os.path.exists(cache_path(path, file, key) + ".p"): - returned_data.append(load_object(cache_path(path, file, key) + ".p")) + record = file + "::" + key + current_version = get_version_hash(path, record) + if is_in_cache(path, record): + returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p")) else: if file not in preloaded: preloaded[file] = preload(path, file) returned_data.append(preloaded[file][key]) if cached: - if not os.path.exists(cache_dir(path, file)): - os.makedirs(cache_dir(path, file)) - dump_object(preloaded[file][key], cache_path(path, file, key)) + if not is_in_cache(path, record): + file, key = record2name_key(record) + if not os.path.exists(cache_dir(path, file)): + os.makedirs(cache_dir(path, file)) + current_version = get_version_hash(path, record) + dump_object(preloaded[file][key], cache_path(path, file, current_version, key)) return returned_data From 64579c477cd68e29cd059bf4750a026bc04e01c9 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 27 Nov 2025 11:07:55 +0100 Subject: [PATCH 11/13] better db call order --- corrlib/meas_io.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index ff7cdc8..b8695ca 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -6,7 +6,7 @@ from .input import sfcf,openQCD import json from typing import Union, Optional from pyerrors import Obs, Corr, load_object, dump_object -from hashlib import sha256 +from hashlib import sha256, sha1 from .tools import cached, record2name_key, make_version_hash from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash @@ -85,18 +85,12 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O meas_path = file_in_archive + "::" + par_hash meas_paths.append(meas_path) known_meas[par_hash] = measurement[corr][subkey] - - if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: - c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) - else: - c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", + data_hash = sha1(pj.create_json_string(measurement[corr][subkey]).encode('UTF-8')).hexdigest() + if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None: + c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) - conn.commit() + c.execute("UPDATE backlogs SET current_version = ?, updated_at = datetime('now') WHERE path = ?", (data_hash, meas_path)) pj.dump_dict_to_json(known_meas, file) - for meas_path in meas_paths: - version_hash = make_version_hash(path, meas_path) - print(version_hash) - c.execute("UPDATE backlogs SET current_version = ? WHERE project = ? AND code = ? and name = ?", (version_hash, uuid, code, corr)) conn.commit() files.append(db) conn.close() From 5bd94633e86080e2763fd82b509bbccc6fa8b2c0 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 28 Nov 2025 16:42:50 +0100 Subject: [PATCH 12/13] centralize file and key to record concat and back --- corrlib/meas_io.py | 11 +++++------ corrlib/tools.py | 4 ++++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index b8695ca..a78f6f2 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -7,7 +7,7 @@ import json from typing import Union, Optional from pyerrors import Obs, Corr, load_object, dump_object from hashlib import sha256, sha1 -from .tools import cached, record2name_key, make_version_hash +from .tools import cached, record2name_key, name_key2record, make_version_hash from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash @@ -82,10 +82,10 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O meas_paths = [] for subkey in subkeys: par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() - meas_path = file_in_archive + "::" + par_hash + meas_path = name_key2record(file_in_archive, par_hash) meas_paths.append(meas_path) known_meas[par_hash] = measurement[corr][subkey] - data_hash = sha1(pj.create_json_string(measurement[corr][subkey]).encode('UTF-8')).hexdigest() + data_hash = make_version_hash(path, meas_path) if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None: c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) @@ -140,7 +140,7 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni returned_data: list = [] for file in needed_data.keys(): for key in list(needed_data[file]): - record = file + "::" + key + record = name_key2record(file, key) current_version = get_version_hash(path, record) if is_in_cache(path, record): returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p")) @@ -165,10 +165,9 @@ def preload(path: str, file: str): def drop_record(path: str, meas_path: str): - file_in_archive = meas_path.split("::")[0] + file_in_archive, sub_key = record2name_key(meas_path) file = os.path.join(path, file_in_archive) db = os.path.join(path, 'backlogger.db') - sub_key = meas_path.split("::")[1] dl.unlock(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() diff --git a/corrlib/tools.py b/corrlib/tools.py index 70eb518..43ab1ba 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -24,6 +24,10 @@ def record2name_key(record_path: str): return file, key +def name_key2record(name: str, key: str): + return name + "::" + key + + def make_version_hash(path, record): file, key = record2name_key(record) with open(os.path.join(path, file), 'rb') as fp: From 7e38d71b90e13927bea8d56511b8ff809ecde2dd Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 28 Nov 2025 16:57:15 +0100 Subject: [PATCH 13/13] re-add some tools functions --- corrlib/tools.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/corrlib/tools.py b/corrlib/tools.py index 337912e..e8a9c18 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -17,3 +17,20 @@ def m2k(m): def k2m(k): return (1/(2*k))-4 + + +def record2name_key(record_path: str): + file = record_path.split("::")[0] + key = record_path.split("::")[1] + return file, key + + +def name_key2record(name: str, key: str): + return name + "::" + key + + +def make_version_hash(path, record): + file, key = record2name_key(record) + with open(os.path.join(path, file), 'rb') as fp: + file_hash = hashlib.file_digest(fp, 'sha1').hexdigest() + return file_hash