add functionality to automatically register when cache has an old version of an archived file
This commit is contained in:
parent
df71ee5ad9
commit
c9fe09d9d6
5 changed files with 66 additions and 28 deletions
|
|
@ -19,5 +19,6 @@ from .main import *
|
||||||
from .import input as input
|
from .import input as input
|
||||||
from .initialization import *
|
from .initialization import *
|
||||||
from .meas_io import *
|
from .meas_io import *
|
||||||
|
from .cache_io import *
|
||||||
from .find import *
|
from .find import *
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,19 @@
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
from .tools import record2name_key
|
||||||
|
from pyerrors import dump_object
|
||||||
|
import datalad.api as dl
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
|
def get_version_hash(path, record):
|
||||||
|
db = os.path.join(path, "backlogger.db")
|
||||||
|
dl.get(db, dataset=path)
|
||||||
|
conn = sqlite3.connect(db)
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'")
|
||||||
|
return c.fetchall()[0][0]
|
||||||
|
|
||||||
|
|
||||||
def drop_cache_files(path: str, fs: Optional[list[str]]=None):
|
def drop_cache_files(path: str, fs: Optional[list[str]]=None):
|
||||||
|
|
@ -19,15 +32,27 @@ def cache_dir(path, file):
|
||||||
return cache_path
|
return cache_path
|
||||||
|
|
||||||
|
|
||||||
def cache_path(path, file, hash, key):
|
def cache_path(path, file, sha_hash, key):
|
||||||
cache_path = os.path.join(cache_dir(path, file), hash, key)
|
cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash)
|
||||||
return cache_path
|
return cache_path
|
||||||
|
|
||||||
def is_in_cache(path, record, hash):
|
|
||||||
|
def is_old_version(path, record):
|
||||||
if os.file.exists(cache_path(path, file, hash, key)):
|
version_hash = get_version_hash(path, record)
|
||||||
return True
|
file, key = record2name_key(record)
|
||||||
else:
|
meas_cache_path = os.path.join(cache_dir(path, file))
|
||||||
return False
|
ls = []
|
||||||
|
for p, ds, fs in os.walk(meas_cache_path):
|
||||||
|
ls.extend(fs)
|
||||||
|
for filename in ls:
|
||||||
|
if key == filename.split("_")[0]:
|
||||||
|
if not version_hash == filename.split("_")[1][:-2]:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_in_cache(path, record):
|
||||||
|
version_hash = get_version_hash(path, record)
|
||||||
|
file, key = record2name_key(record)
|
||||||
|
return os.path.exists(cache_path(path, file, version_hash, key) + ".p")
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from .toml import import_tomls, update_project, reimport_project
|
||||||
from .find import find_record, list_projects
|
from .find import find_record, list_projects
|
||||||
from .tools import str2list
|
from .tools import str2list
|
||||||
from .main import update_aliases
|
from .main import update_aliases
|
||||||
from .meas_io import drop_cache as mio_drop_cache
|
from .cache_io import drop_cache_files as cio_drop_cache_files
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -171,7 +171,7 @@ def drop_cache(
|
||||||
"""
|
"""
|
||||||
Drop the currect cache directory of the dataset.
|
Drop the currect cache directory of the dataset.
|
||||||
"""
|
"""
|
||||||
mio_drop_cache(path)
|
cio_drop_cache_files(path)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,8 @@ def _create_db(db):
|
||||||
parameters TEXT,
|
parameters TEXT,
|
||||||
parameter_file TEXT,
|
parameter_file TEXT,
|
||||||
created_at TEXT,
|
created_at TEXT,
|
||||||
updated_at TEXT)''')
|
updated_at TEXT,
|
||||||
|
current_version TEXT)''')
|
||||||
c.execute('''CREATE TABLE IF NOT EXISTS projects
|
c.execute('''CREATE TABLE IF NOT EXISTS projects
|
||||||
(id TEXT PRIMARY KEY,
|
(id TEXT PRIMARY KEY,
|
||||||
aliases TEXT,
|
aliases TEXT,
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,10 @@ import sqlite3
|
||||||
from .input import sfcf,openQCD
|
from .input import sfcf,openQCD
|
||||||
import json
|
import json
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional
|
||||||
from pyerrors import Obs, Corr, dump_object, load_object
|
from pyerrors import Obs, Corr, load_object, dump_object
|
||||||
from hashlib import sha256, sha1
|
from hashlib import sha256
|
||||||
from .tools import cached, record2name_key
|
from .tools import cached, record2name_key, make_version_hash
|
||||||
import shutil
|
from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash
|
||||||
from .caching import cache_path, cache_dir
|
|
||||||
|
|
||||||
|
|
||||||
def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None):
|
def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: Optional[str]=None):
|
||||||
|
|
@ -79,11 +78,13 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O
|
||||||
subkey = "/".join(par_list)
|
subkey = "/".join(par_list)
|
||||||
subkeys = [subkey]
|
subkeys = [subkey]
|
||||||
pars[subkey] = json.dumps(parameters)
|
pars[subkey] = json.dumps(parameters)
|
||||||
for subkey in subkeys:
|
|
||||||
parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
|
|
||||||
meas_path = file_in_archive + "::" + parHash
|
|
||||||
|
|
||||||
known_meas[parHash] = measurement[corr][subkey]
|
meas_paths = []
|
||||||
|
for subkey in subkeys:
|
||||||
|
par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
|
||||||
|
meas_path = file_in_archive + "::" + par_hash
|
||||||
|
meas_paths.append(meas_path)
|
||||||
|
known_meas[par_hash] = measurement[corr][subkey]
|
||||||
|
|
||||||
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
|
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
|
||||||
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
|
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
|
||||||
|
|
@ -92,7 +93,12 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file: O
|
||||||
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
|
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
pj.dump_dict_to_json(known_meas, file)
|
pj.dump_dict_to_json(known_meas, file)
|
||||||
files.append(path + '/backlogger.db')
|
for meas_path in meas_paths:
|
||||||
|
version_hash = make_version_hash(path, meas_path)
|
||||||
|
print(version_hash)
|
||||||
|
c.execute("UPDATE backlogs SET current_version = ? WHERE project = ? AND code = ? and name = ?", (version_hash, uuid, code, corr))
|
||||||
|
conn.commit()
|
||||||
|
files.append(db)
|
||||||
conn.close()
|
conn.close()
|
||||||
dl.save(files, message="Add measurements to database", dataset=path)
|
dl.save(files, message="Add measurements to database", dataset=path)
|
||||||
|
|
||||||
|
|
@ -140,16 +146,21 @@ def load_records(path: str, record_paths: list[str], preloaded = {}) -> list[Uni
|
||||||
returned_data: list = []
|
returned_data: list = []
|
||||||
for file in needed_data.keys():
|
for file in needed_data.keys():
|
||||||
for key in list(needed_data[file]):
|
for key in list(needed_data[file]):
|
||||||
if os.path.exists(cache_path(path, file, key) + ".p"):
|
record = file + "::" + key
|
||||||
returned_data.append(load_object(cache_path(path, file, key) + ".p"))
|
current_version = get_version_hash(path, record)
|
||||||
|
if is_in_cache(path, record):
|
||||||
|
returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p"))
|
||||||
else:
|
else:
|
||||||
if file not in preloaded:
|
if file not in preloaded:
|
||||||
preloaded[file] = preload(path, file)
|
preloaded[file] = preload(path, file)
|
||||||
returned_data.append(preloaded[file][key])
|
returned_data.append(preloaded[file][key])
|
||||||
if cached:
|
if cached:
|
||||||
if not os.path.exists(cache_dir(path, file)):
|
if not is_in_cache(path, record):
|
||||||
os.makedirs(cache_dir(path, file))
|
file, key = record2name_key(record)
|
||||||
dump_object(preloaded[file][key], cache_path(path, file, key))
|
if not os.path.exists(cache_dir(path, file)):
|
||||||
|
os.makedirs(cache_dir(path, file))
|
||||||
|
current_version = get_version_hash(path, record)
|
||||||
|
dump_object(preloaded[file][key], cache_path(path, file, current_version, key))
|
||||||
return returned_data
|
return returned_data
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue