From 4a4ad52ed7edc1371e7fbf620cf73741e14e39c5 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Tue, 2 Sep 2025 12:36:25 +0000 Subject: [PATCH 01/13] now with proper plural :) --- corrlib/find.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/find.py b/corrlib/find.py index 0c5f9a6..2b9973f 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -148,7 +148,7 @@ def find_record(path, ensemble, correlator_name, code, project=None, parameters= results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after, revision=revision) if code == "sfcf": results = sfcf_filter(results, **kwargs) - print("Found " + str(len(results)) + " results") + print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else "")) return results.reset_index() From 140ec92cbe857e588fc70885d0425e5a0f0e0f94 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:14:12 +0100 Subject: [PATCH 02/13] add example toml-file --- examples/example_import.toml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 examples/example_import.toml diff --git a/examples/example_import.toml b/examples/example_import.toml new file mode 100644 index 0000000..f0965fc --- /dev/null +++ b/examples/example_import.toml @@ -0,0 +1,28 @@ +['project'] +url = "git@kuhl-mann.de:lattice/cA_data.git" +code = "sfcf" +[measurements] +[measurements.a] + path = "/path/to/measurement" + ensemble = "A1k1" + param_file = "/path/to/file" + version = "1.1" + prefix = "pref" + cfg_seperator = "n" + names = ['list', 'of', 'names'] +[measurements.b] + path = "/path/to/measurement" + ensemble = "A1k1" + param_file = "/path/to/file" + version = "1.1" + prefix = "pref" + cfg_seperator = "n" + names = ['list', 'of', 'names'] +[measurements.c] + path = "/path/to/measurement" + ensemble = "A1k1" + param_file = "/path/to/file" + version = "1.1" + prefix = "pref" + cfg_seperator = "n" + names = ['list', 'of', 'names'] From 895b8e35ab14e7bfca8a6611a84af0cfde883304 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:15:40 +0100 Subject: [PATCH 03/13] add drop function for datalad project --- corrlib/main.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/corrlib/main.py b/corrlib/main.py index defe03a..460ed8b 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -134,3 +134,11 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Unio # make this more concrete return uuid + + +def drop_project_data(path: str, uuid: str, path_in_project: str = ""): + """ + Drop (parts of) a prject to free up diskspace + """ + dl.drop(path + "/projects/" + uuid + "/" + path_in_project) + From a5ca159be04998a8c1c0b67bfeed833122e6d45d Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:17:53 +0100 Subject: [PATCH 04/13] add small test for the check meas data function --- tests/import_project_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/import_project_test.py diff --git a/tests/import_project_test.py b/tests/import_project_test.py new file mode 100644 index 0000000..ec8272c --- /dev/null +++ b/tests/import_project_test.py @@ -0,0 +1,17 @@ +import corrlib.toml as t + + +def test_toml_check_measurement_data(): + measurements = { + "a": + { + "path": "/path/to/measurement", + "ensemble": "A1k1", + "param_file": "/path/to/file", + "version": "1.1", + "prefix": "pref", + "cfg_seperator": "n", + "names": ['list', 'of', 'names'] + } + } + t.check_measurement_data(measurements) From 11880215ca1568e485fbf12bb2488f78ad863b23 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:18:44 +0100 Subject: [PATCH 05/13] remove test.ipynb from commitable files --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1530e48..e7385f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ pyerrors_corrlib.egg-info __pycache__ -*.egg-info \ No newline at end of file +*.egg-info +test.ipynb From f271d431c085e70055f1681f28d447fb93b865f6 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:22:29 +0100 Subject: [PATCH 06/13] add pyproject.toml --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ed2df7b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = ["setuptools >= 63.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.ruff.lint] +ignore = ["F403"] \ No newline at end of file From a8d3a335a5bdf70b0f532b0069fd4b192d5e14cb Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 20 Nov 2025 17:36:18 +0100 Subject: [PATCH 07/13] remove projects, which was cluttering --- projects/tmp | 1 - 1 file changed, 1 deletion(-) delete mode 160000 projects/tmp diff --git a/projects/tmp b/projects/tmp deleted file mode 160000 index 216fe4e..0000000 --- a/projects/tmp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 216fe4ed3467ed486390735f8072856cf3d0a409 From 2940ee9055236a05d26d369b806a67bedfcd9aac Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 09:42:40 +0100 Subject: [PATCH 08/13] ensure db get before query --- corrlib/find.py | 7 ++++--- corrlib/main.py | 18 ++++++++++++------ corrlib/meas_io.py | 2 ++ corrlib/tools.py | 1 - 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index 2b9973f..e0772a1 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -143,8 +143,7 @@ def find_record(path, ensemble, correlator_name, code, project=None, parameters= db = path + '/backlogger.db' if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) - if os.path.exists(db): - dl.get(db, dataset=path) + dl.get(db, dataset=path) results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after, revision=revision) if code == "sfcf": results = sfcf_filter(results, **kwargs) @@ -152,12 +151,14 @@ def find_record(path, ensemble, correlator_name, code, project=None, parameters= return results.reset_index() -def find_project(db, name): +def find_project(path, db, name): + dl.get(db, dataset=path) return _project_lookup_by_alias(db, name) def list_projects(path): db = path + '/backlogger.db' + dl.get(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() c.execute("SELECT id,aliases FROM projects") diff --git a/corrlib/main.py b/corrlib/main.py index 460ed8b..ebc923e 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -24,13 +24,15 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni code: str (optional) The code that was used to create the measurements. """ - conn = sqlite3.connect(path + "/backlogger.db") + db = path + "/backlogger.db" + dl.get(db, dataset=path) + conn = sqlite3.connect(db) c = conn.cursor() known_projects = c.execute("SELECT * FROM projects WHERE id=?", (uuid,)) if known_projects.fetchone(): raise ValueError("Project already imported, use update_project() instead.") - dl.unlock(path + "/backlogger.db", dataset=path) + dl.unlock(db, dataset=path) alias_str = None if aliases is not None: alias_str = list2str(aliases) @@ -40,10 +42,11 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni c.execute("INSERT INTO projects (id, aliases, customTags, owner, code, created_at, updated_at) VALUES (?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (uuid, alias_str, tag_str, owner, code)) conn.commit() conn.close() - dl.save(path + "/backlogger.db", message="Added entry for project " + uuid + " to database", dataset=path) + dl.save(db, message="Added entry for project " + uuid + " to database", dataset=path) -def update_project_data(db, uuid, prop, value = None): +def update_project_data(path, db, uuid, prop, value = None): + dl.get(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() c.execute(f"UPDATE projects SET '{prop}' = '{value}' WHERE id == '{uuid}'") @@ -54,6 +57,7 @@ def update_project_data(db, uuid, prop, value = None): def update_aliases(path: str, uuid: str, aliases: list[str]): db = os.path.join(path, "backlogger.db") + dl.get(db, dataset=path) known_data = _project_lookup_by_id(db, uuid)[0] known_aliases = known_data[1] @@ -117,11 +121,13 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Unio if not uuid: raise ValueError("The dataset does not have a uuid!") if not os.path.exists(path + "/projects/" + uuid): - dl.unlock(path + "/backlogger.db", dataset=path) + db = path + "/backlogger.db" + dl.get(db, ds=path) + dl.unlock(db, dataset=path) create_project(path, uuid, owner, tags, aliases, code) move_submodule(path, 'projects/tmp', 'projects/' + uuid) os.mkdir(path + '/import_scripts/' + uuid) - dl.save([path + "/backlogger.db", path + '/projects/' + uuid], message="Import project from " + url, dataset=path) + dl.save([db, path + '/projects/' + uuid], message="Import project from " + url, dataset=path) else: dl.drop(tmp_path, reckless='kill') shutil.rmtree(tmp_path) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 1a06dac..b98eb6e 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -28,6 +28,7 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file=No The uuid of the project. """ db = os.path.join(path, 'backlogger.db') + dl.get(db, ds=path) dl.unlock(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() @@ -176,6 +177,7 @@ def drop_record(path: str, meas_path: str): file_in_archive = meas_path.split("::")[0] file = os.path.join(path, file_in_archive) db = os.path.join(path, 'backlogger.db') + dl.get(db, ds=path) sub_key = meas_path.split("::")[1] dl.unlock(db, dataset=path) conn = sqlite3.connect(db) diff --git a/corrlib/tools.py b/corrlib/tools.py index 3ac8bfe..da5acdd 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,6 +1,5 @@ - def str2list(string): return string.split(",") From 99b4792675adbf4c77e84b95447c1a177ff619a3 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 09:52:03 +0100 Subject: [PATCH 09/13] bump version to 0.1.5 --- corrlib/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/version.py b/corrlib/version.py index bbab024..1276d02 100644 --- a/corrlib/version.py +++ b/corrlib/version.py @@ -1 +1 @@ -__version__ = "0.1.4" +__version__ = "0.1.5" From f3b0b0268c8db9e286f7d41fc5660ea7b53d4f47 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Fri, 21 Nov 2025 09:55:39 +0100 Subject: [PATCH 10/13] bump version to 0.2.3 --- corrlib/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/corrlib/version.py b/corrlib/version.py index 1276d02..d31c31e 100644 --- a/corrlib/version.py +++ b/corrlib/version.py @@ -1 +1 @@ -__version__ = "0.1.5" +__version__ = "0.2.3" From d8ec6ef40e30f7bb5545d17b230805f23569dcd5 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 27 Nov 2025 11:16:00 +0100 Subject: [PATCH 11/13] communicate when files are downloaded --- corrlib/meas_io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index b98eb6e..49b95cf 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -168,8 +168,11 @@ def cache_path(path, file, key): def preload(path: str, file: str): + print("Loading data...") dl.get(os.path.join(path, file), dataset=path) + print("> downloaded file") filedict = pj.load_json_dict(os.path.join(path, file)) + print("> read file") return filedict From 0f499f080a618a77598d713bcba23c44882e641e Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 27 Nov 2025 15:25:54 +0100 Subject: [PATCH 12/13] add thin wrapper for getting files --- corrlib/find.py | 12 ++++++------ corrlib/main.py | 16 ++++++++-------- corrlib/meas_io.py | 10 ++++------ corrlib/tools.py | 9 +++++++++ 4 files changed, 27 insertions(+), 20 deletions(-) diff --git a/corrlib/find.py b/corrlib/find.py index e0772a1..3e73240 100644 --- a/corrlib/find.py +++ b/corrlib/find.py @@ -5,7 +5,7 @@ import json import pandas as pd import numpy as np from .input.implementations import codes -from .tools import k2m +from .tools import k2m, get_file # this will implement the search functionality @@ -143,7 +143,7 @@ def find_record(path, ensemble, correlator_name, code, project=None, parameters= db = path + '/backlogger.db' if code not in codes: raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) - dl.get(db, dataset=path) + get_file(path, "backlogger.db") results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after, revision=revision) if code == "sfcf": results = sfcf_filter(results, **kwargs) @@ -151,14 +151,14 @@ def find_record(path, ensemble, correlator_name, code, project=None, parameters= return results.reset_index() -def find_project(path, db, name): - dl.get(db, dataset=path) - return _project_lookup_by_alias(db, name) +def find_project(path, name): + get_file(path, "backlogger.db") + return _project_lookup_by_alias(os.path.join(path, "backlogger.db"), name) def list_projects(path): db = path + '/backlogger.db' - dl.get(db, dataset=path) + get_file(path, "backlogger.db") conn = sqlite3.connect(db) c = conn.cursor() c.execute("SELECT id,aliases FROM projects") diff --git a/corrlib/main.py b/corrlib/main.py index ebc923e..fc7663f 100644 --- a/corrlib/main.py +++ b/corrlib/main.py @@ -5,7 +5,7 @@ import os from .git_tools import move_submodule import shutil from .find import _project_lookup_by_id -from .tools import list2str, str2list +from .tools import list2str, str2list, get_file from typing import Union @@ -25,7 +25,7 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni The code that was used to create the measurements. """ db = path + "/backlogger.db" - dl.get(db, dataset=path) + get_file(path, "backlogger.db") conn = sqlite3.connect(db) c = conn.cursor() known_projects = c.execute("SELECT * FROM projects WHERE id=?", (uuid,)) @@ -45,9 +45,9 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni dl.save(db, message="Added entry for project " + uuid + " to database", dataset=path) -def update_project_data(path, db, uuid, prop, value = None): - dl.get(db, dataset=path) - conn = sqlite3.connect(db) +def update_project_data(path, uuid, prop, value = None): + get_file(path, "backlogger.db") + conn = sqlite3.connect(os.path.join(path, "backlogger.db")) c = conn.cursor() c.execute(f"UPDATE projects SET '{prop}' = '{value}' WHERE id == '{uuid}'") conn.commit() @@ -57,7 +57,7 @@ def update_project_data(path, db, uuid, prop, value = None): def update_aliases(path: str, uuid: str, aliases: list[str]): db = os.path.join(path, "backlogger.db") - dl.get(db, dataset=path) + get_file(path, "backlogger.db") known_data = _project_lookup_by_id(db, uuid)[0] known_aliases = known_data[1] @@ -77,7 +77,7 @@ def update_aliases(path: str, uuid: str, aliases: list[str]): if not len(new_alias_list) == len(known_alias_list): alias_str = list2str(new_alias_list) dl.unlock(db, dataset=path) - update_project_data(db, uuid, "aliases", alias_str) + update_project_data(path, uuid, "aliases", alias_str) dl.save(db, dataset=path) return @@ -122,7 +122,7 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Unio raise ValueError("The dataset does not have a uuid!") if not os.path.exists(path + "/projects/" + uuid): db = path + "/backlogger.db" - dl.get(db, ds=path) + get_file(path, "backlogger.db") dl.unlock(db, dataset=path) create_project(path, uuid, owner, tags, aliases, code) move_submodule(path, 'projects/tmp', 'projects/' + uuid) diff --git a/corrlib/meas_io.py b/corrlib/meas_io.py index 49b95cf..7122ca0 100644 --- a/corrlib/meas_io.py +++ b/corrlib/meas_io.py @@ -7,7 +7,7 @@ import json from typing import Union from pyerrors import Obs, Corr, dump_object, load_object from hashlib import sha256 -from .tools import cached +from .tools import cached, get_file import shutil @@ -28,7 +28,7 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file=No The uuid of the project. """ db = os.path.join(path, 'backlogger.db') - dl.get(db, ds=path) + get_file(path, "backlogger.db") dl.unlock(db, dataset=path) conn = sqlite3.connect(db) c = conn.cursor() @@ -168,9 +168,7 @@ def cache_path(path, file, key): def preload(path: str, file: str): - print("Loading data...") - dl.get(os.path.join(path, file), dataset=path) - print("> downloaded file") + get_file(path, file) filedict = pj.load_json_dict(os.path.join(path, file)) print("> read file") return filedict @@ -180,7 +178,7 @@ def drop_record(path: str, meas_path: str): file_in_archive = meas_path.split("::")[0] file = os.path.join(path, file_in_archive) db = os.path.join(path, 'backlogger.db') - dl.get(db, ds=path) + get_file(path, 'backlogger.db') sub_key = meas_path.split("::")[1] dl.unlock(db, dataset=path) conn = sqlite3.connect(db) diff --git a/corrlib/tools.py b/corrlib/tools.py index da5acdd..c320150 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -1,3 +1,5 @@ +import os +import datalad.api as dl def str2list(string): @@ -15,3 +17,10 @@ def m2k(m): def k2m(k): return (1/(2*k))-4 + + +def get_file(path, file): + print("Loading data...") + dl.get(os.path.join(path, file), dataset=path) + print("> downloaded file") + \ No newline at end of file From 1e2f419243c369ae0092a2884b26aa76b4003171 Mon Sep 17 00:00:00 2001 From: Justus Kuhlmann Date: Thu, 27 Nov 2025 15:27:41 +0100 Subject: [PATCH 13/13] tell the user when the database is downloading --- corrlib/tools.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/corrlib/tools.py b/corrlib/tools.py index c320150..14bfc05 100644 --- a/corrlib/tools.py +++ b/corrlib/tools.py @@ -20,7 +20,10 @@ def k2m(k): def get_file(path, file): - print("Loading data...") + if file == "backlogger.db": + print("Downloading database...") + else: + print("Downloading data...") dl.get(os.path.join(path, file), dataset=path) print("> downloaded file") \ No newline at end of file