Compare commits

..

No commits in common. "c9fe09d9d6f0baa1a8cb54e405a3bae9ade91783" and "4f3e78177e0497ff344f4d949cb8af8cbfb6c832" have entirely different histories.

8 changed files with 32 additions and 78 deletions

1
.gitignore vendored
View file

@ -2,4 +2,3 @@ pyerrors_corrlib.egg-info
__pycache__ __pycache__
*.egg-info *.egg-info
test.ipynb test.ipynb
test_ds

View file

@ -19,6 +19,5 @@ from .main import *
from .import input as input from .import input as input
from .initialization import * from .initialization import *
from .meas_io import * from .meas_io import *
from .cache_io import *
from .find import * from .find import *
from .version import __version__ from .version import __version__

View file

@ -1,19 +1,6 @@
from typing import Union, Optional from typing import Union, Optional
import os import os
import shutil import shutil
from .tools import record2name_key
from pyerrors import dump_object
import datalad.api as dl
import sqlite3
def get_version_hash(path, record):
db = os.path.join(path, "backlogger.db")
dl.get(db, dataset=path)
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'")
return c.fetchall()[0][0]
def drop_cache_files(path: str, fs: Optional[list[str]]=None): def drop_cache_files(path: str, fs: Optional[list[str]]=None):
@ -32,27 +19,15 @@ def cache_dir(path, file):
return cache_path return cache_path
def cache_path(path, file, sha_hash, key): def cache_path(path, file, hash, key):
cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash) cache_path = os.path.join(cache_dir(path, file), hash, key)
return cache_path return cache_path
def is_in_cache(path, record, hash):
def is_old_version(path, record): if os.file.exists(cache_path(path, file, hash, key)):
version_hash = get_version_hash(path, record) return True
file, key = record2name_key(record) else:
meas_cache_path = os.path.join(cache_dir(path, file)) return False
ls = []
for p, ds, fs in os.walk(meas_cache_path):
ls.extend(fs)
for filename in ls:
if key == filename.split("_")[0]:
if not version_hash == filename.split("_")[1][:-2]:
return True
else:
return False
def is_in_cache(path, record):
version_hash = get_version_hash(path, record)
file, key = record2name_key(record)
return os.path.exists(cache_path(path, file, version_hash, key) + ".p")

View file

@ -6,7 +6,7 @@ from .toml import import_tomls, update_project, reimport_project
from .find import find_record, list_projects from .find import find_record, list_projects
from .tools import str2list from .tools import str2list
from .main import update_aliases from .main import update_aliases
from .cache_io import drop_cache_files as cio_drop_cache_files from .meas_io import drop_cache as mio_drop_cache
import os import os
@ -171,7 +171,7 @@ def drop_cache(
""" """
Drop the currect cache directory of the dataset. Drop the currect cache directory of the dataset.
""" """
cio_drop_cache_files(path) mio_drop_cache(path)
return return

View file

@ -21,8 +21,7 @@ def _create_db(db):
parameters TEXT, parameters TEXT,
parameter_file TEXT, parameter_file TEXT,
created_at TEXT, created_at TEXT,
updated_at TEXT, updated_at TEXT)''')
current_version TEXT)''')
c.execute('''CREATE TABLE IF NOT EXISTS projects c.execute('''CREATE TABLE IF NOT EXISTS projects
(id TEXT PRIMARY KEY, (id TEXT PRIMARY KEY,
aliases TEXT, aliases TEXT,

View file

@ -5,10 +5,11 @@ import sqlite3
from .input import sfcf,openQCD from .input import sfcf,openQCD
import json import json
from typing import Union, Optional from typing import Union, Optional
from pyerrors import Obs, Corr, load_object, dump_object from pyerrors import Obs, Corr, dump_object, load_object
from hashlib import sha256 from hashlib import sha256, sha1
from .tools import cached, record2name_key, make_version_hash from .tools import cached, record2name_key
from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash import shutil
from .caching import cache_path, cache_dir
def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None): def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None):
@ -78,13 +79,11 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O
subkey = "/".join(par_list) subkey = "/".join(par_list)
subkeys = [subkey] subkeys = [subkey]
pars[subkey] = json.dumps(parameters) pars[subkey] = json.dumps(parameters)
meas_paths = []
for subkey in subkeys: for subkey in subkeys:
par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
meas_path = file_in_archive + "::" + par_hash meas_path = file_in_archive + "::" + parHash
meas_paths.append(meas_path)
known_meas[par_hash] = measurement[corr][subkey] known_meas[parHash] = measurement[corr][subkey]
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
@ -93,12 +92,7 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
conn.commit() conn.commit()
pj.dump_dict_to_json(known_meas, file) pj.dump_dict_to_json(known_meas, file)
for meas_path in meas_paths: files.append(path + '/backlogger.db')
version_hash = make_version_hash(path, meas_path)
print(version_hash)
c.execute("UPDATE backlogs SET current_version = ? WHERE project = ? AND code = ? and name = ?", (version_hash, uuid, code, corr))
conn.commit()
files.append(db)
conn.close() conn.close()
dl.save(files, message="Add measurements to database", dataset=path) dl.save(files, message="Add measurements to database", dataset=path)
@ -146,21 +140,16 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni
returned_data: list = [] returned_data: list = []
for file in needed_data.keys(): for file in needed_data.keys():
for key in list(needed_data[file]): for key in list(needed_data[file]):
record = file + "::" + key if os.path.exists(cache_path(path, file, key) + ".p"):
current_version = get_version_hash(path, record) returned_data.append(load_object(cache_path(path, file, key) + ".p"))
if is_in_cache(path, record):
returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p"))
else: else:
if file not in preloaded: if file not in preloaded:
preloaded[file] = preload(path, file) preloaded[file] = preload(path, file)
returned_data.append(preloaded[file][key]) returned_data.append(preloaded[file][key])
if cached: if cached:
if not is_in_cache(path, record): if not os.path.exists(cache_dir(path, file)):
file, key = record2name_key(record) os.makedirs(cache_dir(path, file))
if not os.path.exists(cache_dir(path, file)): dump_object(preloaded[file][key], cache_path(path, file, key))
os.makedirs(cache_dir(path, file))
current_version = get_version_hash(path, record)
dump_object(preloaded[file][key], cache_path(path, file, current_version, key))
return returned_data return returned_data

View file

@ -1,5 +1,5 @@
import hashlib
import os
def str2list(string): def str2list(string):
return string.split(",") return string.split(",")
@ -22,10 +22,3 @@ def record2name_key(record_path: str):
file = record_path.split("::")[0] file = record_path.split("::")[0]
key = record_path.split("::")[1] key = record_path.split("::")[1]
return file, key return file, key
def make_version_hash(path, record):
file, key = record2name_key(record)
with open(os.path.join(path, file), 'rb') as fp:
file_hash = hashlib.file_digest(fp, 'sha1').hexdigest()
return file_hash

View file

@ -10,7 +10,7 @@ setup(name='pycorrlib',
version=version['__version__'], version=version['__version__'],
author='Justus Kuhlmann', author='Justus Kuhlmann',
author_email='j_kuhl19@uni-muenster.de', author_email='j_kuhl19@uni-muenster.de',
install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5', 'gitpython>=3.1.45'], install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5'],
entry_points = { entry_points = {
'console_scripts': ['pcl=corrlib.cli:app'], 'console_scripts': ['pcl=corrlib.cli:app'],
}, },