can now read and write simple sfcf measurements
This commit is contained in:
parent
5ec34ad98b
commit
c3aa7577fb
7 changed files with 408 additions and 0 deletions
20
backlogger/__init__.py
Normal file
20
backlogger/__init__.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
"""
|
||||||
|
The aim of this project is to extend pyerrors to be able to collect measurements from different projects and make them easily accessable to
|
||||||
|
the research group. The idea is to build a database, in which the researcher can easily search for measurements on a correlator basis,
|
||||||
|
which may be reusable.
|
||||||
|
As a standard to store the measurements, we will use the .json.gz format from pyerrors.
|
||||||
|
This allows us to organize a library by files and exported dictionaries.
|
||||||
|
Also, this is compressable, but is also human readable in uncompressed form.
|
||||||
|
The project creates a database with a table to store the measurements, and a folder to store the .json.gz files and tracks the changes to the folder automatically using datalad.
|
||||||
|
This way, we can harness the power of datalad as a backand, to reproducibly build our database.
|
||||||
|
Projects, that are also datalad datasets can be linked to the backlog of correlators as subdatasets, such that, using datalad rerun function,
|
||||||
|
it can be easily seen wehere the respective measurement came from and ho it may be reproduced.
|
||||||
|
|
||||||
|
For now, we are interested in collecting primary IObservables only, as these are the most computationally expensive and time consuming to calculate.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
from .main import *
|
||||||
|
from .input import *
|
||||||
|
from .initialization import *
|
||||||
|
from .io import *
|
31
backlogger/git_tools.py
Normal file
31
backlogger/git_tools.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
import os
|
||||||
|
import datalad.api as dl
|
||||||
|
|
||||||
|
|
||||||
|
def move_submodule(repo_path, old_path, new_path):
|
||||||
|
"""
|
||||||
|
Move a submodule to a new location.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
old_path: str
|
||||||
|
The old path of the module.
|
||||||
|
new_path: str
|
||||||
|
The new path of the module.
|
||||||
|
"""
|
||||||
|
|
||||||
|
os.rename(repo_path + "/" + old_path, repo_path + "/" + new_path)
|
||||||
|
with open(repo_path + '/.gitmodules', 'r') as fp:
|
||||||
|
lines = fp.readlines()
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('\tpath'):
|
||||||
|
line = line.replace(old_path, new_path)
|
||||||
|
break
|
||||||
|
if line.startswith('[submodule "projects/tmp"]'):
|
||||||
|
line = line.replace(old_path, new_path)
|
||||||
|
break
|
||||||
|
with open(repo_path + '/.gitmodules', 'w') as fp:
|
||||||
|
fp.writelines(lines)
|
||||||
|
dl.save(repo_path, message="Move module from " + old_path + " to " + new_path, dataset=repo_path)
|
46
backlogger/initialization.py
Normal file
46
backlogger/initialization.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
import sqlite3
|
||||||
|
import datalad.api as dl
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def _create_db(path):
|
||||||
|
"""
|
||||||
|
Create the database file and the table.
|
||||||
|
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(path)
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute('''CREATE TABLE IF NOT EXISTS backlogs
|
||||||
|
(id INTEGER PRIMARY KEY,
|
||||||
|
name TEXT,
|
||||||
|
ensemble TEXT,
|
||||||
|
code TEXT,
|
||||||
|
path TEXT,
|
||||||
|
project TEXT,
|
||||||
|
parameters TEXT,
|
||||||
|
parameter_file TEXT,
|
||||||
|
created_at TEXT,
|
||||||
|
updated_at TEXT)''')
|
||||||
|
c.execute('''CREATE TABLE IF NOT EXISTS projects
|
||||||
|
(id TEXT PRIMARY KEY,
|
||||||
|
aliases TEXT,
|
||||||
|
code TEXT,
|
||||||
|
created_at TEXT,
|
||||||
|
updated_at TEXT)''')
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def create(path):
|
||||||
|
"""
|
||||||
|
Create folder of backlogs.
|
||||||
|
|
||||||
|
"""
|
||||||
|
dl.create(path)
|
||||||
|
_create_db(path + '/backlogger.db')
|
||||||
|
os.chmod(path + '/backlogger.db', 0o666)
|
||||||
|
os.makedirs(path + '/projects')
|
||||||
|
os.makedirs(path + '/projects/tmp')
|
||||||
|
os.makedirs(path + '/archive')
|
||||||
|
os.makedirs(path + '/import_scripts/template.py')
|
||||||
|
dl.save(path, dataset=path, message="Initialize backlogger directory.")
|
5
backlogger/input/__init__.py
Normal file
5
backlogger/input/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
"""
|
||||||
|
Import functions for different codes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from . import sfcf
|
195
backlogger/input/sfcf.py
Normal file
195
backlogger/input/sfcf.py
Normal file
|
@ -0,0 +1,195 @@
|
||||||
|
import pyerrors as pe
|
||||||
|
import datalad.api as dl
|
||||||
|
|
||||||
|
|
||||||
|
def read_param(path, project, file_in_project):
|
||||||
|
"""
|
||||||
|
Read the parameters from the sfcf file.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
file : str
|
||||||
|
The path to the sfcf file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict
|
||||||
|
The parameters in the sfcf file.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
file = path + "/projects/" + project + '/' + file_in_project
|
||||||
|
dl.get(file, dataset=path)
|
||||||
|
with open(file, 'r') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
params = {}
|
||||||
|
params['wf_offsets'] = []
|
||||||
|
params['wf_basis'] = []
|
||||||
|
params['wf_coeff'] = []
|
||||||
|
params['qr'] = {}
|
||||||
|
params['mrr'] = []
|
||||||
|
params['crr'] = []
|
||||||
|
params['qs'] = {}
|
||||||
|
params['mrs'] = []
|
||||||
|
params['crs'] = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('#'):
|
||||||
|
continue
|
||||||
|
if line.startswith('\n'):
|
||||||
|
continue
|
||||||
|
if line.startswith('wf_offsets'):
|
||||||
|
num_wf_offsets = line.split()[1]
|
||||||
|
for i in range(int(num_wf_offsets)):
|
||||||
|
params['wf_offsets'].append([float(x) for x in lines[lines.index(line) + i + 1].split("#")[0].split()])
|
||||||
|
|
||||||
|
if line.startswith('wf_basis'):
|
||||||
|
num_wf_basis = line.split()[1]
|
||||||
|
for i in range(int(num_wf_basis)):
|
||||||
|
params['wf_basis'].append([float(x) for x in lines[lines.index(line) + i + 1].split("#")[0].split()])
|
||||||
|
|
||||||
|
if line.startswith('wf_coeff'):
|
||||||
|
num_wf_coeff = line.split()[1]
|
||||||
|
for i in range(int(num_wf_coeff)):
|
||||||
|
params['wf_coeff'].append([float(x) for x in lines[lines.index(line) + i + 1].split("#")[0].split()])
|
||||||
|
|
||||||
|
if line.startswith('qr'):
|
||||||
|
num_qr = line.split()[1]
|
||||||
|
for i in range(int(num_qr)):
|
||||||
|
dat = lines[lines.index(line) + i + 1].split("#")[0].strip().split()[:-1]
|
||||||
|
params['qr'][dat[0]] = {}
|
||||||
|
params['qr'][dat[0]]['mass'] = float(dat[1])
|
||||||
|
params['qr'][dat[0]]['thetas'] = [float(x) for x in dat[2:5]]
|
||||||
|
|
||||||
|
if line.startswith('mrr'):
|
||||||
|
num_mrr = line.split()[1]
|
||||||
|
for i in range(int(num_mrr)):
|
||||||
|
params['mrr'].append(lines[lines.index(line) + i + 1].split("#")[0].strip())
|
||||||
|
|
||||||
|
if line.startswith('crr'):
|
||||||
|
num_crr = line.split()[1]
|
||||||
|
for i in range(int(num_crr)):
|
||||||
|
params['crr'].append(lines[lines.index(line) + i + 1].split("#")[0].strip())
|
||||||
|
|
||||||
|
if line.startswith('qs'):
|
||||||
|
num_qs = line.split()[1]
|
||||||
|
for i in range(int(num_qs)):
|
||||||
|
dat = lines[lines.index(line) + i + 1].split("#")[0].strip().split()[:-1]
|
||||||
|
params['qs'][dat[0]] = {}
|
||||||
|
params['qs'][dat[0]]['mass'] = float(dat[1])
|
||||||
|
params['qs'][dat[0]]['thetas'] = [float(x) for x in dat[2:5]]
|
||||||
|
|
||||||
|
if line.startswith('mrs'):
|
||||||
|
num_mrs = line.split()[1]
|
||||||
|
for i in range(int(num_mrs)):
|
||||||
|
params['mrs'].append(lines[lines.index(line) + i + 1].split("#")[0].strip())
|
||||||
|
|
||||||
|
if line.startswith('crs'):
|
||||||
|
num_crs = line.split()[1]
|
||||||
|
for i in range(int(num_crs)):
|
||||||
|
params['mrs'].append(lines[lines.index(line) + i + 1].split("#")[0].strip())
|
||||||
|
|
||||||
|
# catch standard cases
|
||||||
|
if params['wf_offsets'] == []:
|
||||||
|
params['wf_offsets'] = [[0, 0, 0]]
|
||||||
|
|
||||||
|
if params['wf_coeff'] == []:
|
||||||
|
params['wf_coeff'] = [[0, -1]]
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def _map_params(params, spec_list):
|
||||||
|
"""
|
||||||
|
Map the extracted parameters to the extracted data.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
new_specs = {}
|
||||||
|
# quarks
|
||||||
|
quarks = spec_list[0].split(" ")
|
||||||
|
|
||||||
|
new_specs['quarks'] = (params['qr'][quarks[0]], params['qr'][quarks[1]])
|
||||||
|
|
||||||
|
# offset
|
||||||
|
new_specs['offset'] = (params['wf_offsets'][int(spec_list[1])])
|
||||||
|
# wf1
|
||||||
|
contribs = []
|
||||||
|
for i, coeff in enumerate(params['wf_coeff'][int(spec_list[2])]):
|
||||||
|
if not coeff == 0:
|
||||||
|
contrib = (coeff, params['wf_basis'][i])
|
||||||
|
contribs.append(contrib)
|
||||||
|
new_specs['wf1'] = contribs
|
||||||
|
|
||||||
|
if len(spec_list) == 4:
|
||||||
|
# wf2
|
||||||
|
contribs = []
|
||||||
|
for i, coeff in enumerate(params['wf_coeff'][int(spec_list[3])]):
|
||||||
|
if not coeff == 0:
|
||||||
|
contrib = (coeff, params['wf_basis'][i])
|
||||||
|
contribs.append(contrib)
|
||||||
|
new_specs['wf2'] = contribs
|
||||||
|
return new_specs
|
||||||
|
|
||||||
|
|
||||||
|
def read_data(path, project, dir_in_project, prefix, param, version='1.0c', cfg_seperator='n', sep='/', **kwargs):
|
||||||
|
"""
|
||||||
|
Extract the data from the sfcf file.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict
|
||||||
|
The data from the sfcf file.
|
||||||
|
|
||||||
|
"""
|
||||||
|
names = kwargs.get('names', None)
|
||||||
|
corr_types = {
|
||||||
|
'f_A': 'bi',
|
||||||
|
'f_P': 'bi',
|
||||||
|
'g_A': 'bi',
|
||||||
|
'g_P': 'bi',
|
||||||
|
'f_1': 'bb',
|
||||||
|
'k_1': 'bb',
|
||||||
|
}
|
||||||
|
directory = path + "/projects/" + project + '/' + dir_in_project
|
||||||
|
dl.get(directory, dataset=path)
|
||||||
|
corr_type_list = []
|
||||||
|
for corr_name in param['crr']:
|
||||||
|
if corr_name not in corr_types:
|
||||||
|
raise ValueError('Correlator type not known.')
|
||||||
|
corr_type_list.append(corr_types[corr_name])
|
||||||
|
|
||||||
|
if not param['crr'] == []:
|
||||||
|
if names is not None:
|
||||||
|
data_crr = pe.input.sfcf.read_sfcf_multi(directory, prefix, param['crr'], param['mrr'], corr_type_list, range(len(param['wf_offsets'])),
|
||||||
|
range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True, names=names)
|
||||||
|
else:
|
||||||
|
data_crr = pe.input.sfcf.read_sfcf_multi(directory, prefix, param['crr'], param['mrr'], corr_type_list, range(len(param['wf_offsets'])),
|
||||||
|
range(len(param['wf_basis'])), range(len(param['wf_basis'])), version, cfg_seperator, keyed_out=True)
|
||||||
|
|
||||||
|
if not param['crs'] == []:
|
||||||
|
data_crs = pe.input.sfcf.read_sfcf_multi(directory, param['crs'])
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
if not param['crr'] == []:
|
||||||
|
for key in data_crr.keys():
|
||||||
|
data[key] = data_crr[key]
|
||||||
|
|
||||||
|
if not param['crs'] == []:
|
||||||
|
for key in data_crs.keys():
|
||||||
|
data[key] = data_crs[key]
|
||||||
|
|
||||||
|
# sort data by correlator
|
||||||
|
sorted_data = {}
|
||||||
|
for key in data.keys():
|
||||||
|
key_parts = key.split(sep)
|
||||||
|
corr = key_parts[0]
|
||||||
|
if corr_types[corr] == 'bi':
|
||||||
|
specs = _map_params(param, key_parts[1:-1])
|
||||||
|
else:
|
||||||
|
specs = _map_params(param, key_parts[1:])
|
||||||
|
if corr not in sorted_data:
|
||||||
|
sorted_data[corr] = {}
|
||||||
|
sorted_data[corr][sep.join(key_parts[1:])] = data[key]
|
||||||
|
return sorted_data, specs
|
46
backlogger/io.py
Normal file
46
backlogger/io.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
from pyerrors.input import json as pj
|
||||||
|
import os
|
||||||
|
import datalad.api as dl
|
||||||
|
import sqlite3
|
||||||
|
|
||||||
|
|
||||||
|
def write_measurement(path, ensemble, measurement, uuid, code, parameters, parameter_file):
|
||||||
|
"""
|
||||||
|
Write a measurement to the backlog.
|
||||||
|
If the file for the measurement already exists, update the measurement.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: str
|
||||||
|
The path to the backlogger folder.
|
||||||
|
ensemble: str
|
||||||
|
The ensemble of the measurement.
|
||||||
|
measurement: dict
|
||||||
|
Measurements to be captured in the backlogging system.
|
||||||
|
uuid: str
|
||||||
|
The uuid of the project.
|
||||||
|
"""
|
||||||
|
|
||||||
|
for corr in measurement.keys():
|
||||||
|
file = path + "/archive/" + ensemble + "/" + corr + '/' + uuid + '.json.gz'
|
||||||
|
if not os.path.exists(path + "/archive/" + ensemble + "/" + corr):
|
||||||
|
os.makedirs(path + "/archive/" + ensemble + "/" + corr)
|
||||||
|
conn = sqlite3.connect(path + '/backlogger.db')
|
||||||
|
c = conn.cursor()
|
||||||
|
if os.path.exists(file):
|
||||||
|
dl.unlock(file, dataset=path)
|
||||||
|
known_meas = pj.load_json_dict(file)
|
||||||
|
for key in measurement[corr].keys():
|
||||||
|
known_meas[key] = measurement[corr][key]
|
||||||
|
else:
|
||||||
|
known_meas = measurement[corr]
|
||||||
|
pj.dump_dict_to_json(measurement[corr], file)
|
||||||
|
for subkey in measurement[corr].keys():
|
||||||
|
meas_path = file + "::" + subkey
|
||||||
|
if not os.path.exists(file):
|
||||||
|
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))", (corr, ensemble, code, meas_path, uuid, parameters, parameter_file))
|
||||||
|
else:
|
||||||
|
c.execute("UPDATE backlogs SET updated_at=datetime('now') WHERE path=?", (file,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
dl.save([path + '/backlogger.db', file], message="Add measurement to database", dataset=path)
|
65
backlogger/main.py
Normal file
65
backlogger/main.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
import sqlite3
|
||||||
|
import datalad.api as dl
|
||||||
|
import os
|
||||||
|
from .git_tools import move_submodule
|
||||||
|
|
||||||
|
|
||||||
|
def create_project(path, uuid, aliases=None, code=None):
|
||||||
|
"""
|
||||||
|
Create a new project entry in the database.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: str
|
||||||
|
The path to the backlogger folder.
|
||||||
|
uuid: str
|
||||||
|
The uuid of the project.
|
||||||
|
name: str (optional)
|
||||||
|
Costum name for the project (e.g. 'cA determination on exponential clover').
|
||||||
|
code: str (optional)
|
||||||
|
The code that was used to create the measurements.
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(path + "/backlogger.db")
|
||||||
|
c = conn.cursor()
|
||||||
|
known_projects = c.execute("SELECT * FROM projects WHERE id=?", (uuid,))
|
||||||
|
if known_projects.fetchone():
|
||||||
|
raise ValueError("Project already imported, use update_project() instead.")
|
||||||
|
|
||||||
|
dl.unlock(path + "/backlogger.db", dataset=path)
|
||||||
|
c.execute("INSERT INTO projects (id, aliases, code, created_at, updated_at) VALUES (?, ?, ?, datetime('now'), datetime('now'))", (uuid, aliases, code))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
dl.save(path + "/backlogger.db", message="Added entry for project " + uuid + " to database", dataset=path)
|
||||||
|
|
||||||
|
|
||||||
|
def import_project(url, path, aliases=None, code=None):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
|
||||||
|
url: str
|
||||||
|
The url of the project to import. This can be any url that datalad can handle.
|
||||||
|
path: str
|
||||||
|
The path to the backlogger folder.
|
||||||
|
name: str
|
||||||
|
Custom name of the project, alias of the project.
|
||||||
|
code: str
|
||||||
|
Code that was used to create the measurements.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# install in tmp to find uuid
|
||||||
|
tmp_path = path + '/projects/tmp'
|
||||||
|
dl.install(path=tmp_path, source=url, dataset=path)
|
||||||
|
with open(tmp_path + "/.datalad/config") as fp:
|
||||||
|
for line in fp:
|
||||||
|
if line.startswith("\tid"):
|
||||||
|
uuid = line.split()[2]
|
||||||
|
break
|
||||||
|
|
||||||
|
create_project(path, uuid, aliases, code)
|
||||||
|
move_submodule(path, 'projects/tmp', 'projects/' + uuid)
|
||||||
|
os.mkdir(path + '/projects/tmp')
|
||||||
|
|
||||||
|
os.mkdir(path + '/import_scripts/' + uuid)
|
||||||
|
dl.save(path, message="Import project from " + url, dataset=path)
|
||||||
|
return uuid
|
Loading…
Add table
Add a link
Reference in a new issue