better implementation of hashes

This commit is contained in:
Justus Kuhlmann 2025-04-08 12:59:03 +00:00
parent c4e1e1aa77
commit 362fc8788b

View file

@ -34,10 +34,11 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file):
conn = sqlite3.connect(path + '/backlogger.db') conn = sqlite3.connect(path + '/backlogger.db')
c = conn.cursor() c = conn.cursor()
files = [] files = []
hashed_measurement = {}
for corr in measurement.keys(): for corr in measurement.keys():
file = path + "/archive/" + ensemble + "/" + corr + '/' + uuid + '.json.gz' file = path + "/archive/" + ensemble + "/" + corr + '/' + uuid + '.json.gz'
files.append(file) files.append(file)
known_meas = None known_meas = {}
if not os.path.exists(path + "/archive/" + ensemble + "/" + corr): if not os.path.exists(path + "/archive/" + ensemble + "/" + corr):
os.makedirs(path + "/archive/" + ensemble + "/" + corr) os.makedirs(path + "/archive/" + ensemble + "/" + corr)
else: else:
@ -45,28 +46,29 @@ def write_measurement(path, ensemble, measurement, uuid, code, parameter_file):
dl.unlock(file, dataset=path) dl.unlock(file, dataset=path)
known_meas = pj.load_json_dict(file) known_meas = pj.load_json_dict(file)
if code == "sfcf": if code == "sfcf":
hashed_measurement[corr] = {}
for subkey in measurement[corr].keys(): for subkey in measurement[corr].keys():
pars = sfcf.get_specs(corr + "/" + subkey, parameters) pars = sfcf.get_specs(corr + "/" + subkey, parameters)
parHash = sha256(str(pars)) parHash = sha256(str(pars).encode('UTF-8')).hexdigest()
meas_path = file + "::" + parHash meas_path = file + "::" + parHash
if known_meas is not None: known_meas[parHash] = measurement[corr][subkey]
known_meas[parHash] = measurement[corr][parHash]
# this should be only set if something changed.
else:
known_meas = measurement[corr]
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None: if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, )) c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
else: else:
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(corr, ensemble, code, meas_path, uuid, sfcf.get_specs(corr + "/" + subkey, parameters), parameter_file)) (corr, ensemble, code, meas_path, uuid, pars, parameter_file))
elif code == "openQCD": elif code == "openQCD":
for i in range(len(parameters["rw_fcts"])): for i in range(len(parameters["rw_fcts"])):
subkey = "/".join(parameters["rw_fcts"][i]) pars = json.dumps(parameters["rw_fcts"][i])
meas_path = file + "::" + subkey parHash = sha256(str(pars).encode('UTF-8')).hexdigest()
meas_path = file + "::" + parHash
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
else:
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", ("ms1", ensemble, code, meas_path, uuid, json.dumps(parameters["rw_fcts"][i]), parameter_file)) c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", ("ms1", ensemble, code, meas_path, uuid, json.dumps(parameters["rw_fcts"][i]), parameter_file))
conn.commit() conn.commit()
pj.dump_dict_to_json(measurement[corr], file) pj.dump_dict_to_json(known_meas, file)
files.append(path + '/backlogger.db') files.append(path + '/backlogger.db')
conn.close() conn.close()
dl.save(files, message="Add measurements to database", dataset=path) dl.save(files, message="Add measurements to database", dataset=path)