try sha hash to combat colisions

This commit is contained in:
Justus Kuhlmann 2025-04-08 09:39:57 +00:00
parent 2998fd2613
commit c4e1e1aa77

View file

@ -6,6 +6,7 @@ from .input import sfcf,openQCD
import json import json
from typing import Union from typing import Union
from pyerrors import Obs, Corr from pyerrors import Obs, Corr
from hashlib import sha256
def write_measurement(path, ensemble, measurement, uuid, code, parameter_file): def write_measurement(path, ensemble, measurement, uuid, code, parameter_file):
@ -45,9 +46,11 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file):
known_meas = pj.load_json_dict(file) known_meas = pj.load_json_dict(file)
if code == "sfcf": if code == "sfcf":
for subkey in measurement[corr].keys(): for subkey in measurement[corr].keys():
meas_path = file + "::" + subkey pars = sfcf.get_specs(corr + "/" + subkey, parameters)
parHash = sha256(str(pars))
meas_path = file + "::" + parHash
if known_meas is not None: if known_meas is not None:
known_meas[subkey] = measurement[corr][subkey] known_meas[parHash] = measurement[corr][parHash]
# this should be only set if something changed. # this should be only set if something changed.
else: else:
known_meas = measurement[corr] known_meas = measurement[corr]
@ -55,7 +58,8 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file):
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
else: else:
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, sfcf.get_specs(corr + "/" + subkey, parameters), parameter_file)) c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(corr, ensemble, code, meas_path, uuid, sfcf.get_specs(corr + "/" + subkey, parameters), parameter_file))
elif code == "openQCD": elif code == "openQCD":
for i in range(len(parameters["rw_fcts"])): for i in range(len(parameters["rw_fcts"])):
subkey = "/".join(parameters["rw_fcts"][i]) subkey = "/".join(parameters["rw_fcts"][i])
@ -104,14 +108,15 @@ def load_records(path: str, meas_paths: list[str]) -> list[Union[Corr, Obs]]:
""" """
needed_data: dict[str, list[str]] = {} needed_data: dict[str, list[str]] = {}
for mpath in meas_paths: for mpath in meas_paths:
file = path.split("::")[0] file = mpath.split("::")[0]
if file not in needed_data.keys(): if file not in needed_data.keys():
needed_data[file] = [] needed_data[file] = []
key = mpath.split("::")[1] key = mpath.split("::")[1]
needed_data[file].append(key) needed_data[file].append(key)
dl.get([os.path.join(path, file) for file in needed_data.keys()], dataset=path)
returned_data: list = [] returned_data: list = []
for filename in needed_data.keys(): for filename in needed_data.keys():
filedict = pj.load_json_dict(filename) filedict = pj.load_json_dict(os.path.join(path, filename))
for key in list(needed_data[filename]): for key in list(needed_data[filename]):
returned_data.append(filedict[key]) returned_data.append(filedict[key])
return returned_data return returned_data