Compare commits
21 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
5e1be236ee |
|||
|
f8b70f07c9 |
|||
|
6ea1827b99 |
|||
|
3d91509ab6 |
|||
|
14d19ce9dd |
|||
|
15bf399a89 |
|||
|
e07f2ef9b0 |
|||
|
7e38d71b90 |
|||
|
1d981022cb |
|||
|
5bd94633e8 |
|||
|
64579c477c |
|||
|
c9fe09d9d6 |
|||
|
df71ee5ad9 |
|||
|
f47a9caae7 |
|||
|
a080ca835f |
|||
|
4f3e78177e |
|||
|
91c7a9d95d |
|||
|
73d7687359 |
|||
|
085256857d |
|||
|
|
5e87f569e2 | ||
|
|
50ff178a0c |
19 changed files with 244 additions and 145 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -2,6 +2,7 @@ pyerrors_corrlib.egg-info
|
||||||
__pycache__
|
__pycache__
|
||||||
*.egg-info
|
*.egg-info
|
||||||
test.ipynb
|
test.ipynb
|
||||||
|
test_ds
|
||||||
.vscode
|
.vscode
|
||||||
.venv
|
.venv
|
||||||
.pytest_cache
|
.pytest_cache
|
||||||
|
|
|
||||||
|
|
@ -5,3 +5,12 @@ This is done in a reproducible way using `datalad`.
|
||||||
In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together.
|
In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together.
|
||||||
Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements.
|
Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements.
|
||||||
The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors.
|
The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors.
|
||||||
|
|
||||||
|
We work with the following nomenclature in this project:
|
||||||
|
- Measurement
|
||||||
|
A setis of Observables, including the appropriate metadata.
|
||||||
|
- Project
|
||||||
|
A series of measurements that was done by one person as part of their research.
|
||||||
|
- Record
|
||||||
|
An entry of a single Correlator in the database of the backlogger.
|
||||||
|
-
|
||||||
25
TODO.md
25
TODO.md
|
|
@ -1,14 +1,21 @@
|
||||||
# TODO
|
# TODO
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
- implement import of non-datalad projects
|
- [ ] implement import of non-datalad projects
|
||||||
- implement a way to use another backlog repo as a project
|
- [ ] implement a way to use another backlog repo as a project
|
||||||
|
- [ ] make cache deadlock resistent (no read while writing)
|
||||||
- find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project
|
- [ ] find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project
|
||||||
- this could e.g. be done along the lines of mandatory documentation
|
- [ ] this could e.g. be done along the lines of mandatory documentation
|
||||||
- keep better track of the versions of the code, that was used for a specific measurement.
|
- [ ] keep better track of the versions of the code, that was used for a specific measurement.
|
||||||
- maybe let this be an input in the project file?
|
- [ ] maybe let this be an input in the project file?
|
||||||
- git repo and commit hash/version tag
|
- [ ] git repo and commit hash/version tag
|
||||||
|
- [ ] implement a code table?
|
||||||
|
- [ ] parallel processing of measurements
|
||||||
|
- [ ] extra SQL table for ensembles with UUID and aliases
|
||||||
## Bugfixes
|
## Bugfixes
|
||||||
- [ ] revisit the reimport function for single files
|
- [ ] revisit the reimport function for single files
|
||||||
|
- [ ] drop record needs to look if no records are left in a json file.
|
||||||
|
|
||||||
|
## Rough Ideas
|
||||||
|
- [ ] multitable could provide a high speed implementation of an HDF5 based format
|
||||||
|
- [ ] implement also a way to include compiled binaries in the archives.
|
||||||
|
|
|
||||||
|
|
@ -22,3 +22,4 @@ from .meas_io import load_records as load_records
|
||||||
from .find import find_project as find_project
|
from .find import find_project as find_project
|
||||||
from .find import find_record as find_record
|
from .find import find_record as find_record
|
||||||
from .find import list_projects as list_projects
|
from .find import list_projects as list_projects
|
||||||
|
from .tools import *
|
||||||
|
|
|
||||||
58
corrlib/cache_io.py
Normal file
58
corrlib/cache_io.py
Normal file
|
|
@ -0,0 +1,58 @@
|
||||||
|
from typing import Optional
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from .tools import record2name_key
|
||||||
|
import datalad.api as dl
|
||||||
|
import sqlite3
|
||||||
|
from tools import db_filename
|
||||||
|
|
||||||
|
|
||||||
|
def get_version_hash(path: str, record: str) -> str:
|
||||||
|
db = os.path.join(path, db_filename(path))
|
||||||
|
dl.get(db, dataset=path)
|
||||||
|
conn = sqlite3.connect(db)
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'")
|
||||||
|
return str(c.fetchall()[0][0])
|
||||||
|
|
||||||
|
|
||||||
|
def drop_cache_files(path: str, fs: Optional[list[str]]=None) -> None:
|
||||||
|
cache_dir = os.path.join(path, ".cache")
|
||||||
|
if fs is None:
|
||||||
|
fs = os.listdir(cache_dir)
|
||||||
|
for f in fs:
|
||||||
|
shutil.rmtree(os.path.join(cache_dir, f))
|
||||||
|
|
||||||
|
|
||||||
|
def cache_dir(path: str, file: str) -> str:
|
||||||
|
cache_path_list = [path]
|
||||||
|
cache_path_list.append(".cache")
|
||||||
|
cache_path_list.extend(file.split("/")[1:])
|
||||||
|
cache_path = "/".join(cache_path_list)
|
||||||
|
return cache_path
|
||||||
|
|
||||||
|
|
||||||
|
def cache_path(path: str, file: str, sha_hash: str, key: str) -> str:
|
||||||
|
cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash)
|
||||||
|
return cache_path
|
||||||
|
|
||||||
|
|
||||||
|
def is_old_version(path: str, record: str) -> bool:
|
||||||
|
version_hash = get_version_hash(path, record)
|
||||||
|
file, key = record2name_key(record)
|
||||||
|
meas_cache_path = os.path.join(cache_dir(path, file))
|
||||||
|
ls = []
|
||||||
|
is_old = True
|
||||||
|
for p, ds, fs in os.walk(meas_cache_path):
|
||||||
|
ls.extend(fs)
|
||||||
|
for filename in ls:
|
||||||
|
if key == filename.split("_")[0]:
|
||||||
|
if version_hash == filename.split("_")[1][:-2]:
|
||||||
|
is_old = False
|
||||||
|
return is_old
|
||||||
|
|
||||||
|
|
||||||
|
def is_in_cache(path: str, record: str) -> bool:
|
||||||
|
version_hash = get_version_hash(path, record)
|
||||||
|
file, key = record2name_key(record)
|
||||||
|
return os.path.exists(cache_path(path, file, version_hash, key) + ".p")
|
||||||
|
|
@ -6,8 +6,7 @@ from .toml import import_tomls, update_project, reimport_project
|
||||||
from .find import find_record, list_projects
|
from .find import find_record, list_projects
|
||||||
from .tools import str2list
|
from .tools import str2list
|
||||||
from .main import update_aliases
|
from .main import update_aliases
|
||||||
from .meas_io import drop_cache as mio_drop_cache
|
from .cache_io import drop_cache_files as cio_drop_cache_files
|
||||||
from .meas_io import load_record as mio_load_record
|
|
||||||
import os
|
import os
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
|
|
||||||
|
|
@ -36,7 +35,6 @@ def update(
|
||||||
update_project(path, uuid)
|
update_project(path, uuid)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def list(
|
def list(
|
||||||
path: str = typer.Option(
|
path: str = typer.Option(
|
||||||
|
|
@ -96,39 +94,12 @@ def find(
|
||||||
ensemble: str = typer.Argument(),
|
ensemble: str = typer.Argument(),
|
||||||
corr: str = typer.Argument(),
|
corr: str = typer.Argument(),
|
||||||
code: str = typer.Argument(),
|
code: str = typer.Argument(),
|
||||||
arg: str = typer.Option(
|
|
||||||
str('all'),
|
|
||||||
"--argument",
|
|
||||||
"-a",
|
|
||||||
),
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator.
|
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator.
|
||||||
"""
|
"""
|
||||||
results = find_record(path, ensemble, corr, code)
|
results = find_record(path, ensemble, corr, code)
|
||||||
if arg == 'all':
|
|
||||||
print(results)
|
print(results)
|
||||||
else:
|
|
||||||
for r in results[arg].values:
|
|
||||||
print(r)
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def stat(
|
|
||||||
path: str = typer.Option(
|
|
||||||
str('./corrlib'),
|
|
||||||
"--dataset",
|
|
||||||
"-d",
|
|
||||||
),
|
|
||||||
record: str = typer.Argument(),
|
|
||||||
) -> None:
|
|
||||||
"""
|
|
||||||
Show the statistics of a given record.
|
|
||||||
"""
|
|
||||||
record = mio_load_record(path, record)[0]
|
|
||||||
statistics = record.idl
|
|
||||||
print(statistics)
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
|
|
@ -209,7 +180,7 @@ def drop_cache(
|
||||||
"""
|
"""
|
||||||
Drop the currect cache directory of the dataset.
|
Drop the currect cache directory of the dataset.
|
||||||
"""
|
"""
|
||||||
mio_drop_cache(path)
|
cio_drop_cache_files(path)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import json
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .input.implementations import codes
|
from .input.implementations import codes
|
||||||
from .tools import k2m, get_db_file
|
from .tools import k2m, db_filename
|
||||||
from .tracker import get
|
from .tracker import get
|
||||||
from typing import Any, Optional
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
|
@ -230,7 +230,7 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
|
||||||
|
|
||||||
def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None,
|
def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None,
|
||||||
created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame:
|
created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame:
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
db = os.path.join(path, db_file)
|
db = os.path.join(path, db_file)
|
||||||
if code not in codes:
|
if code not in codes:
|
||||||
raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes))
|
raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes))
|
||||||
|
|
@ -262,7 +262,7 @@ def find_project(path: str, name: str) -> str:
|
||||||
uuid: str
|
uuid: str
|
||||||
The uuid of the project in question.
|
The uuid of the project in question.
|
||||||
"""
|
"""
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
return _project_lookup_by_alias(os.path.join(path, db_file), name)
|
return _project_lookup_by_alias(os.path.join(path, db_file), name)
|
||||||
|
|
||||||
|
|
@ -281,7 +281,7 @@ def list_projects(path: str) -> list[tuple[str, str]]:
|
||||||
results: list[Any]
|
results: list[Any]
|
||||||
The projects known to the library.
|
The projects known to the library.
|
||||||
"""
|
"""
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
conn = sqlite3.connect(os.path.join(path, db_file))
|
conn = sqlite3.connect(os.path.join(path, db_file))
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,8 @@ def _create_db(db: str) -> None:
|
||||||
parameters TEXT,
|
parameters TEXT,
|
||||||
parameter_file TEXT,
|
parameter_file TEXT,
|
||||||
created_at TEXT,
|
created_at TEXT,
|
||||||
updated_at TEXT)''')
|
updated_at TEXT,
|
||||||
|
current_version TEXT)''')
|
||||||
c.execute('''CREATE TABLE IF NOT EXISTS projects
|
c.execute('''CREATE TABLE IF NOT EXISTS projects
|
||||||
(id TEXT PRIMARY KEY,
|
(id TEXT PRIMARY KEY,
|
||||||
aliases TEXT,
|
aliases TEXT,
|
||||||
|
|
@ -71,6 +72,7 @@ def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser:
|
||||||
'archive_path': 'archive',
|
'archive_path': 'archive',
|
||||||
'toml_imports_path': 'toml_imports',
|
'toml_imports_path': 'toml_imports',
|
||||||
'import_scripts_path': 'import_scripts',
|
'import_scripts_path': 'import_scripts',
|
||||||
|
'cache_path': '.cache',
|
||||||
}
|
}
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,6 @@ import datalad.api as dl
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from fnmatch import fnmatch
|
|
||||||
|
|
||||||
|
|
||||||
bi_corrs: list[str] = ["f_P", "fP", "f_p",
|
bi_corrs: list[str] = ["f_P", "fP", "f_p",
|
||||||
|
|
@ -299,7 +298,6 @@ def read_data(path: str, project: str, dir_in_project: str, prefix: str, param:
|
||||||
if not appended:
|
if not appended:
|
||||||
compact = (version[-1] == "c")
|
compact = (version[-1] == "c")
|
||||||
for i, item in enumerate(ls):
|
for i, item in enumerate(ls):
|
||||||
if fnmatch(item, prefix + "*"):
|
|
||||||
rep_path = directory + '/' + item
|
rep_path = directory + '/' + item
|
||||||
sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, [])
|
sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, [])
|
||||||
files_to_get.extend([rep_path + "/" + filename for filename in sub_ls])
|
files_to_get.extend([rep_path + "/" + filename for filename in sub_ls])
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ import os
|
||||||
from .git_tools import move_submodule
|
from .git_tools import move_submodule
|
||||||
import shutil
|
import shutil
|
||||||
from .find import _project_lookup_by_id
|
from .find import _project_lookup_by_id
|
||||||
from .tools import list2str, str2list, get_db_file
|
from .tools import list2str, str2list, db_filename
|
||||||
from .tracker import get, save, unlock, clone, drop
|
from .tracker import get, save, unlock, clone, drop
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional
|
||||||
|
|
||||||
|
|
@ -25,7 +25,7 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni
|
||||||
code: str (optional)
|
code: str (optional)
|
||||||
The code that was used to create the measurements.
|
The code that was used to create the measurements.
|
||||||
"""
|
"""
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
db = os.path.join(path, db_file)
|
db = os.path.join(path, db_file)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
conn = sqlite3.connect(db)
|
conn = sqlite3.connect(db)
|
||||||
|
|
@ -64,7 +64,7 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None]
|
||||||
value: str or None
|
value: str or None
|
||||||
Value to se `prop` to.
|
Value to se `prop` to.
|
||||||
"""
|
"""
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
conn = sqlite3.connect(os.path.join(path, db_file))
|
conn = sqlite3.connect(os.path.join(path, db_file))
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
|
|
@ -75,7 +75,7 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None]
|
||||||
|
|
||||||
|
|
||||||
def update_aliases(path: str, uuid: str, aliases: list[str]) -> None:
|
def update_aliases(path: str, uuid: str, aliases: list[str]) -> None:
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
db = os.path.join(path, db_file)
|
db = os.path.join(path, db_file)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
known_data = _project_lookup_by_id(db, uuid)[0]
|
known_data = _project_lookup_by_id(db, uuid)[0]
|
||||||
|
|
@ -135,7 +135,7 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti
|
||||||
if not uuid:
|
if not uuid:
|
||||||
raise ValueError("The dataset does not have a uuid!")
|
raise ValueError("The dataset does not have a uuid!")
|
||||||
if not os.path.exists(path + "/projects/" + uuid):
|
if not os.path.exists(path + "/projects/" + uuid):
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
unlock(path, db_file)
|
unlock(path, db_file)
|
||||||
create_project(path, uuid, owner, tags, aliases, code)
|
create_project(path, uuid, owner, tags, aliases, code)
|
||||||
|
|
|
||||||
|
|
@ -3,13 +3,14 @@ import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from .input import sfcf,openQCD
|
from .input import sfcf,openQCD
|
||||||
import json
|
import json
|
||||||
from typing import Union
|
from typing import Union, Any
|
||||||
from pyerrors import Obs, Corr, dump_object, load_object
|
from pyerrors import Obs, Corr, load_object, dump_object
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from .tools import get_db_file, cache_enabled
|
from .tools import record2name_key, name_key2record, make_version_hash
|
||||||
|
from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash
|
||||||
|
from .tools import db_filename, cache_enabled
|
||||||
from .tracker import get, save, unlock
|
from .tracker import get, save, unlock
|
||||||
import shutil
|
import shutil
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
|
|
||||||
def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None:
|
def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None:
|
||||||
|
|
@ -32,7 +33,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
|
||||||
parameter_file: str
|
parameter_file: str
|
||||||
The parameter file used for the measurement.
|
The parameter file used for the measurement.
|
||||||
"""
|
"""
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
db = os.path.join(path, db_file)
|
db = os.path.join(path, db_file)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
unlock(path, db_file)
|
unlock(path, db_file)
|
||||||
|
|
@ -85,18 +86,18 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
|
||||||
subkey = "/".join(par_list)
|
subkey = "/".join(par_list)
|
||||||
subkeys = [subkey]
|
subkeys = [subkey]
|
||||||
pars[subkey] = json.dumps(parameters)
|
pars[subkey] = json.dumps(parameters)
|
||||||
|
|
||||||
|
meas_paths = []
|
||||||
for subkey in subkeys:
|
for subkey in subkeys:
|
||||||
parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
|
par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
|
||||||
meas_path = file_in_archive + "::" + parHash
|
meas_path = name_key2record(file_in_archive, par_hash)
|
||||||
|
meas_paths.append(meas_path)
|
||||||
known_meas[parHash] = measurement[corr][subkey]
|
known_meas[par_hash] = measurement[corr][subkey]
|
||||||
|
data_hash = make_version_hash(path, meas_path)
|
||||||
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
|
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None:
|
||||||
c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
|
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))",
|
||||||
else:
|
|
||||||
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
|
|
||||||
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
|
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
|
||||||
conn.commit()
|
c.execute("UPDATE backlogs SET current_version = ?, updated_at = datetime('now') WHERE path = ?", (data_hash, meas_path))
|
||||||
pj.dump_dict_to_json(known_meas, file)
|
pj.dump_dict_to_json(known_meas, file)
|
||||||
files.append(os.path.join(path, db_file))
|
files.append(os.path.join(path, db_file))
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
@ -123,7 +124,7 @@ def load_record(path: str, meas_path: str) -> Union[Corr, Obs]:
|
||||||
return load_records(path, [meas_path])[0]
|
return load_records(path, [meas_path])[0]
|
||||||
|
|
||||||
|
|
||||||
def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]:
|
def load_records(path: str, record_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]:
|
||||||
"""
|
"""
|
||||||
Load a list of records by their paths.
|
Load a list of records by their paths.
|
||||||
|
|
||||||
|
|
@ -142,70 +143,32 @@ def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {
|
||||||
The loaded records.
|
The loaded records.
|
||||||
"""
|
"""
|
||||||
needed_data: dict[str, list[str]] = {}
|
needed_data: dict[str, list[str]] = {}
|
||||||
for mpath in meas_paths:
|
for rpath in record_paths:
|
||||||
file = mpath.split("::")[0]
|
file, key = record2name_key(rpath)
|
||||||
if file not in needed_data.keys():
|
if file not in needed_data.keys():
|
||||||
needed_data[file] = []
|
needed_data[file] = []
|
||||||
key = mpath.split("::")[1]
|
|
||||||
needed_data[file].append(key)
|
needed_data[file].append(key)
|
||||||
returned_data: list[Any] = []
|
returned_data: list[Any] = []
|
||||||
for file in needed_data.keys():
|
for file in needed_data.keys():
|
||||||
for key in list(needed_data[file]):
|
for key in list(needed_data[file]):
|
||||||
if os.path.exists(cache_path(path, file, key) + ".p"):
|
record = name_key2record(file, key)
|
||||||
returned_data.append(load_object(cache_path(path, file, key) + ".p"))
|
current_version = get_version_hash(path, record)
|
||||||
|
if is_in_cache(path, record):
|
||||||
|
returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p"))
|
||||||
else:
|
else:
|
||||||
if file not in preloaded:
|
if file not in preloaded:
|
||||||
preloaded[file] = preload(path, file)
|
preloaded[file] = preload(path, file)
|
||||||
returned_data.append(preloaded[file][key])
|
returned_data.append(preloaded[file][key])
|
||||||
if cache_enabled(path):
|
if cache_enabled(path):
|
||||||
|
if not is_in_cache(path, record):
|
||||||
|
file, key = record2name_key(record)
|
||||||
if not os.path.exists(cache_dir(path, file)):
|
if not os.path.exists(cache_dir(path, file)):
|
||||||
os.makedirs(cache_dir(path, file))
|
os.makedirs(cache_dir(path, file))
|
||||||
dump_object(preloaded[file][key], cache_path(path, file, key))
|
current_version = get_version_hash(path, record)
|
||||||
|
dump_object(preloaded[file][key], cache_path(path, file, current_version, key))
|
||||||
return returned_data
|
return returned_data
|
||||||
|
|
||||||
|
|
||||||
def cache_dir(path: str, file: str) -> str:
|
|
||||||
"""
|
|
||||||
Returns the directory corresponding to the cache for the given file.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
path: str
|
|
||||||
The path of the library.
|
|
||||||
file: str
|
|
||||||
The file in the library that we want to access the cached data of.
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
cache_path: str
|
|
||||||
The path holding the cached data for the given file.
|
|
||||||
"""
|
|
||||||
cache_path_list = [path]
|
|
||||||
cache_path_list.append(".cache")
|
|
||||||
cache_path_list.extend(file.split("/")[1:])
|
|
||||||
cache_path = "/".join(cache_path_list)
|
|
||||||
return cache_path
|
|
||||||
|
|
||||||
|
|
||||||
def cache_path(path: str, file: str, key: str) -> str:
|
|
||||||
"""
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
path: str
|
|
||||||
The path of the library.
|
|
||||||
file: str
|
|
||||||
The file in the library that we want to access the cached data of.
|
|
||||||
key: str
|
|
||||||
The key within the archive file.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
cache_path: str
|
|
||||||
The path at which the measurement of the given file and key is cached.
|
|
||||||
"""
|
|
||||||
cache_path = os.path.join(cache_dir(path, file), key)
|
|
||||||
return cache_path
|
|
||||||
|
|
||||||
|
|
||||||
def preload(path: str, file: str) -> dict[str, Any]:
|
def preload(path: str, file: str) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method.
|
Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method.
|
||||||
|
|
@ -241,7 +204,7 @@ def drop_record(path: str, meas_path: str) -> None:
|
||||||
"""
|
"""
|
||||||
file_in_archive = meas_path.split("::")[0]
|
file_in_archive = meas_path.split("::")[0]
|
||||||
file = os.path.join(path, file_in_archive)
|
file = os.path.join(path, file_in_archive)
|
||||||
db_file = get_db_file(path)
|
db_file = db_filename(path)
|
||||||
db = os.path.join(path, db_file)
|
db = os.path.join(path, db_file)
|
||||||
get(path, db_file)
|
get(path, db_file)
|
||||||
sub_key = meas_path.split("::")[1]
|
sub_key = meas_path.split("::")[1]
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import os
|
import os
|
||||||
|
import hashlib
|
||||||
from configparser import ConfigParser
|
from configparser import ConfigParser
|
||||||
from typing import Any
|
from typing import Any, Union
|
||||||
|
|
||||||
CONFIG_FILENAME = ".corrlib"
|
CONFIG_FILENAME = ".corrlib"
|
||||||
cached: bool = True
|
cached: bool = True
|
||||||
|
|
@ -22,6 +23,7 @@ def str2list(string: str) -> list[str]:
|
||||||
"""
|
"""
|
||||||
return string.split(",")
|
return string.split(",")
|
||||||
|
|
||||||
|
|
||||||
def list2str(mylist: list[str]) -> str:
|
def list2str(mylist: list[str]) -> str:
|
||||||
"""
|
"""
|
||||||
Convert a list to a comma-separated string.
|
Convert a list to a comma-separated string.
|
||||||
|
|
@ -39,6 +41,7 @@ def list2str(mylist: list[str]) -> str:
|
||||||
s = ",".join(mylist)
|
s = ",".join(mylist)
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
def m2k(m: float) -> float:
|
def m2k(m: float) -> float:
|
||||||
"""
|
"""
|
||||||
Convert to bare quark mas $m$ to inverse mass parameter $kappa$.
|
Convert to bare quark mas $m$ to inverse mass parameter $kappa$.
|
||||||
|
|
@ -73,6 +76,47 @@ def k2m(k: float) -> float:
|
||||||
return (1/(2*k))-4
|
return (1/(2*k))-4
|
||||||
|
|
||||||
|
|
||||||
|
def record2name_key(record_path: str) -> tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Convert a record to a pair of name and key.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
record: str
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
name: str
|
||||||
|
key: str
|
||||||
|
"""
|
||||||
|
file = record_path.split("::")[0]
|
||||||
|
key = record_path.split("::")[1]
|
||||||
|
return file, key
|
||||||
|
|
||||||
|
|
||||||
|
def name_key2record(name: str, key: str) -> str:
|
||||||
|
"""
|
||||||
|
Convert a name and a key to a record name.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
name: str
|
||||||
|
key: str
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
record: str
|
||||||
|
"""
|
||||||
|
return name + "::" + key
|
||||||
|
|
||||||
|
|
||||||
|
def make_version_hash(path: str, record: str) -> str:
|
||||||
|
file, key = record2name_key(record)
|
||||||
|
with open(os.path.join(path, file), 'rb') as fp:
|
||||||
|
file_hash = hashlib.file_digest(fp, 'sha1').hexdigest()
|
||||||
|
return file_hash
|
||||||
|
|
||||||
|
|
||||||
def set_config(path: str, section: str, option: str, value: Any) -> None:
|
def set_config(path: str, section: str, option: str, value: Any) -> None:
|
||||||
"""
|
"""
|
||||||
Set configuration parameters for the library.
|
Set configuration parameters for the library.
|
||||||
|
|
@ -100,7 +144,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def get_db_file(path: str) -> str:
|
def db_filename(path: str) -> str:
|
||||||
"""
|
"""
|
||||||
Get the database file associated with the library at the given path.
|
Get the database file associated with the library at the given path.
|
||||||
|
|
||||||
|
|
@ -144,3 +188,28 @@ def cache_enabled(path: str) -> bool:
|
||||||
cached_str = config.get('core', 'cached', fallback='True')
|
cached_str = config.get('core', 'cached', fallback='True')
|
||||||
cached_bool = cached_str == ('True')
|
cached_bool = cached_str == ('True')
|
||||||
return cached_bool
|
return cached_bool
|
||||||
|
|
||||||
|
|
||||||
|
def cache_dir_name(path: str) -> Union[str, None]:
|
||||||
|
"""
|
||||||
|
Get the database file associated with the library at the given path.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
path: str
|
||||||
|
The path of the library.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
db_file: str
|
||||||
|
The file holding the database.
|
||||||
|
"""
|
||||||
|
config_path = os.path.join(path, CONFIG_FILENAME)
|
||||||
|
config = ConfigParser()
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
config.read(config_path)
|
||||||
|
if cache_enabled(path):
|
||||||
|
cache = config.get('paths', 'cache', fallback='.cache')
|
||||||
|
else:
|
||||||
|
cache = None
|
||||||
|
return cache
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ from configparser import ConfigParser
|
||||||
import datalad.api as dl
|
import datalad.api as dl
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import shutil
|
import shutil
|
||||||
from .tools import get_db_file
|
from .tools import db_filename
|
||||||
|
|
||||||
|
|
||||||
def get_tracker(path: str) -> str:
|
def get_tracker(path: str) -> str:
|
||||||
|
|
@ -43,7 +43,7 @@ def get(path: str, file: str) -> None:
|
||||||
"""
|
"""
|
||||||
tracker = get_tracker(path)
|
tracker = get_tracker(path)
|
||||||
if tracker == 'datalad':
|
if tracker == 'datalad':
|
||||||
if file == get_db_file(path):
|
if file == db_filename(path):
|
||||||
print("Downloading database...")
|
print("Downloading database...")
|
||||||
else:
|
else:
|
||||||
print("Downloading data...")
|
print("Downloading data...")
|
||||||
|
|
|
||||||
18
setup.py
Normal file
18
setup.py
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
from setuptools import setup
|
||||||
|
from distutils.util import convert_path
|
||||||
|
|
||||||
|
|
||||||
|
version = {}
|
||||||
|
with open(convert_path('corrlib/version.py')) as ver_file:
|
||||||
|
exec(ver_file.read(), version)
|
||||||
|
|
||||||
|
setup(name='pycorrlib',
|
||||||
|
version=version['__version__'],
|
||||||
|
author='Justus Kuhlmann',
|
||||||
|
author_email='j_kuhl19@uni-muenster.de',
|
||||||
|
install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5', 'gitpython>=3.1.45'],
|
||||||
|
entry_points = {
|
||||||
|
'console_scripts': ['pcl=corrlib.cli:app'],
|
||||||
|
},
|
||||||
|
packages=['corrlib', 'corrlib.input']
|
||||||
|
)
|
||||||
|
|
@ -2,18 +2,19 @@ from typer.testing import CliRunner
|
||||||
from corrlib.cli import app
|
from corrlib.cli import app
|
||||||
import os
|
import os
|
||||||
import sqlite3 as sql
|
import sqlite3 as sql
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
runner = CliRunner()
|
runner = CliRunner()
|
||||||
|
|
||||||
|
|
||||||
def test_version():
|
def test_version() -> None:
|
||||||
result = runner.invoke(app, ["--version"])
|
result = runner.invoke(app, ["--version"])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
assert "corrlib" in result.output
|
assert "corrlib" in result.output
|
||||||
|
|
||||||
|
|
||||||
def test_init_folders(tmp_path):
|
def test_init_folders(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
|
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
@ -21,7 +22,7 @@ def test_init_folders(tmp_path):
|
||||||
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
||||||
|
|
||||||
|
|
||||||
def test_init_db(tmp_path):
|
def test_init_db(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
|
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
@ -81,7 +82,7 @@ def test_init_db(tmp_path):
|
||||||
assert expected_col in backlog_column_names
|
assert expected_col in backlog_column_names
|
||||||
|
|
||||||
|
|
||||||
def test_list(tmp_path):
|
def test_list(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
|
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
|
||||||
assert result.exit_code == 0
|
assert result.exit_code == 0
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import corrlib.toml as t
|
import corrlib.toml as t
|
||||||
|
|
||||||
|
|
||||||
def test_toml_check_measurement_data():
|
def test_toml_check_measurement_data() -> None:
|
||||||
measurements = {
|
measurements = {
|
||||||
"a":
|
"a":
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
import corrlib.input.sfcf as input
|
import corrlib.input.sfcf as input
|
||||||
import json
|
import json
|
||||||
|
|
||||||
def test_get_specs():
|
def test_get_specs() -> None:
|
||||||
parameters = {
|
parameters = {
|
||||||
'crr': [
|
'crr': [
|
||||||
'f_P', 'f_A'
|
'f_P', 'f_A'
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,23 @@
|
||||||
import corrlib.initialization as init
|
import corrlib.initialization as init
|
||||||
import os
|
import os
|
||||||
import sqlite3 as sql
|
import sqlite3 as sql
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
def test_init_folders(tmp_path):
|
def test_init_folders(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
init.create(str(dataset_path))
|
init.create(str(dataset_path))
|
||||||
assert os.path.exists(str(dataset_path))
|
assert os.path.exists(str(dataset_path))
|
||||||
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
||||||
|
|
||||||
|
|
||||||
def test_init_folders_no_tracker(tmp_path):
|
def test_init_folders_no_tracker(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
init.create(str(dataset_path), tracker="None")
|
init.create(str(dataset_path), tracker="None")
|
||||||
assert os.path.exists(str(dataset_path))
|
assert os.path.exists(str(dataset_path))
|
||||||
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
||||||
|
|
||||||
|
|
||||||
def test_init_config(tmp_path):
|
def test_init_config(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
init.create(str(dataset_path), tracker="None")
|
init.create(str(dataset_path), tracker="None")
|
||||||
config_path = dataset_path / ".corrlib"
|
config_path = dataset_path / ".corrlib"
|
||||||
|
|
@ -34,7 +35,7 @@ def test_init_config(tmp_path):
|
||||||
assert config.get("paths", "import_scripts_path") == "import_scripts"
|
assert config.get("paths", "import_scripts_path") == "import_scripts"
|
||||||
|
|
||||||
|
|
||||||
def test_init_db(tmp_path):
|
def test_init_db(tmp_path: Path) -> None:
|
||||||
dataset_path = tmp_path / "test_dataset"
|
dataset_path = tmp_path / "test_dataset"
|
||||||
init.create(str(dataset_path))
|
init.create(str(dataset_path))
|
||||||
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
assert os.path.exists(str(dataset_path / "backlogger.db"))
|
||||||
|
|
|
||||||
|
|
@ -3,29 +3,29 @@
|
||||||
from corrlib import tools as tl
|
from corrlib import tools as tl
|
||||||
|
|
||||||
|
|
||||||
def test_m2k():
|
def test_m2k() -> None:
|
||||||
for m in [0.1, 0.5, 1.0]:
|
for m in [0.1, 0.5, 1.0]:
|
||||||
expected_k = 1 / (2 * m + 8)
|
expected_k = 1 / (2 * m + 8)
|
||||||
assert tl.m2k(m) == expected_k
|
assert tl.m2k(m) == expected_k
|
||||||
|
|
||||||
|
|
||||||
def test_k2m():
|
def test_k2m() -> None:
|
||||||
for m in [0.1, 0.5, 1.0]:
|
for m in [0.1, 0.5, 1.0]:
|
||||||
assert tl.k2m(m) == (1/(2*m))-4
|
assert tl.k2m(m) == (1/(2*m))-4
|
||||||
|
|
||||||
|
|
||||||
def test_k2m_m2k():
|
def test_k2m_m2k() -> None:
|
||||||
for m in [0.1, 0.5, 1.0]:
|
for m in [0.1, 0.5, 1.0]:
|
||||||
k = tl.m2k(m)
|
k = tl.m2k(m)
|
||||||
m_converted = tl.k2m(k)
|
m_converted = tl.k2m(k)
|
||||||
assert abs(m - m_converted) < 1e-9
|
assert abs(m - m_converted) < 1e-9
|
||||||
|
|
||||||
|
|
||||||
def test_str2list():
|
def test_str2list() -> None:
|
||||||
assert tl.str2list("a,b,c") == ["a", "b", "c"]
|
assert tl.str2list("a,b,c") == ["a", "b", "c"]
|
||||||
assert tl.str2list("1,2,3") == ["1", "2", "3"]
|
assert tl.str2list("1,2,3") == ["1", "2", "3"]
|
||||||
|
|
||||||
|
|
||||||
def test_list2str():
|
def test_list2str() -> None:
|
||||||
assert tl.list2str(["a", "b", "c"]) == "a,b,c"
|
assert tl.list2str(["a", "b", "c"]) == "a,b,c"
|
||||||
assert tl.list2str(["1", "2", "3"]) == "1,2,3"
|
assert tl.list2str(["1", "2", "3"]) == "1,2,3"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue