Compare commits

..

4 commits

Author SHA1 Message Date
314234fed8 Merge pull request 'fix the file finder for sfcf' (#19) from fix/sfcf_file_finder into develop
Some checks failed
Mypy / mypy (push) Failing after 1s
Pytest / pytest (3.12) (push) Failing after 1s
Pytest / pytest (3.13) (push) Failing after 1s
Pytest / pytest (3.14) (push) Failing after 2s
Ruff / ruff (push) Failing after 1s
Reviewed-on: https://www.kuhl-mann.de/git/git/jkuhl/corrlib/pulls/19
2026-02-20 16:55:54 +01:00
60b56dfb25
fix the file finder for sfcf
Some checks failed
Mypy / mypy (push) Failing after 2s
Pytest / pytest (3.12) (push) Failing after 2s
Pytest / pytest (3.13) (push) Failing after 2s
Pytest / pytest (3.14) (push) Failing after 2s
Ruff / ruff (push) Failing after 2s
Mypy / mypy (pull_request) Failing after 2s
Pytest / pytest (3.12) (pull_request) Failing after 2s
Pytest / pytest (3.13) (pull_request) Failing after 2s
Pytest / pytest (3.14) (pull_request) Failing after 1s
Ruff / ruff (pull_request) Failing after 1s
2026-02-20 16:54:14 +01:00
06b07bc590 Merge pull request 'add a simple method to show the statistics of a record' (#18) from feat/cli_stat into develop
Some checks failed
Mypy / mypy (push) Failing after 2s
Pytest / pytest (3.14) (push) Failing after 1s
Pytest / pytest (3.12) (push) Failing after 2s
Pytest / pytest (3.13) (push) Failing after 2s
Ruff / ruff (push) Failing after 1s
Reviewed-on: https://www.kuhl-mann.de/git/git/jkuhl/corrlib/pulls/18
2026-02-20 10:19:33 +01:00
0d6ad8f552
add a simple method to show the statistics of a record
Some checks failed
Pytest / pytest (3.12) (push) Failing after 2s
Pytest / pytest (3.12) (pull_request) Failing after 2s
Pytest / pytest (3.13) (pull_request) Failing after 2s
Pytest / pytest (3.14) (pull_request) Failing after 2s
Ruff / ruff (pull_request) Failing after 2s
Mypy / mypy (push) Failing after 2s
Pytest / pytest (3.13) (push) Failing after 2s
Pytest / pytest (3.14) (push) Failing after 2s
Ruff / ruff (push) Failing after 2s
Mypy / mypy (pull_request) Failing after 2s
2026-02-20 10:17:15 +01:00
19 changed files with 145 additions and 244 deletions

1
.gitignore vendored
View file

@ -2,7 +2,6 @@ pyerrors_corrlib.egg-info
__pycache__ __pycache__
*.egg-info *.egg-info
test.ipynb test.ipynb
test_ds
.vscode .vscode
.venv .venv
.pytest_cache .pytest_cache

View file

@ -5,12 +5,3 @@ This is done in a reproducible way using `datalad`.
In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together. In principle, a dataset is created, that is automatically administered by the backlogger, in which data from differnt projects are held together.
Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements. Everything is catalogued by a searchable SQL database, which holds the paths to the respective measurements.
The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors. The original projects can be linked to the dataset and the data may be imported using wrapper functions around the read methonds of pyerrors.
We work with the following nomenclature in this project:
- Measurement
A setis of Observables, including the appropriate metadata.
- Project
A series of measurements that was done by one person as part of their research.
- Record
An entry of a single Correlator in the database of the backlogger.
-

25
TODO.md
View file

@ -1,21 +1,14 @@
# TODO # TODO
## Features ## Features
- [ ] implement import of non-datalad projects - implement import of non-datalad projects
- [ ] implement a way to use another backlog repo as a project - implement a way to use another backlog repo as a project
- [ ] make cache deadlock resistent (no read while writing)
- [ ] find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project - find a way to convey the mathematical structure of what EXACTLY is the form of the correlator in a specific project
- [ ] this could e.g. be done along the lines of mandatory documentation - this could e.g. be done along the lines of mandatory documentation
- [ ] keep better track of the versions of the code, that was used for a specific measurement. - keep better track of the versions of the code, that was used for a specific measurement.
- [ ] maybe let this be an input in the project file? - maybe let this be an input in the project file?
- [ ] git repo and commit hash/version tag - git repo and commit hash/version tag
- [ ] implement a code table?
- [ ] parallel processing of measurements
- [ ] extra SQL table for ensembles with UUID and aliases
## Bugfixes ## Bugfixes
- [ ] revisit the reimport function for single files - [ ] revisit the reimport function for single files
- [ ] drop record needs to look if no records are left in a json file.
## Rough Ideas
- [ ] multitable could provide a high speed implementation of an HDF5 based format
- [ ] implement also a way to include compiled binaries in the archives.

View file

@ -22,4 +22,3 @@ from .meas_io import load_records as load_records
from .find import find_project as find_project from .find import find_project as find_project
from .find import find_record as find_record from .find import find_record as find_record
from .find import list_projects as list_projects from .find import list_projects as list_projects
from .tools import *

View file

@ -1,58 +0,0 @@
from typing import Optional
import os
import shutil
from .tools import record2name_key
import datalad.api as dl
import sqlite3
from tools import db_filename
def get_version_hash(path: str, record: str) -> str:
db = os.path.join(path, db_filename(path))
dl.get(db, dataset=path)
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT current_version FROM 'backlogs' WHERE path = '{record}'")
return str(c.fetchall()[0][0])
def drop_cache_files(path: str, fs: Optional[list[str]]=None) -> None:
cache_dir = os.path.join(path, ".cache")
if fs is None:
fs = os.listdir(cache_dir)
for f in fs:
shutil.rmtree(os.path.join(cache_dir, f))
def cache_dir(path: str, file: str) -> str:
cache_path_list = [path]
cache_path_list.append(".cache")
cache_path_list.extend(file.split("/")[1:])
cache_path = "/".join(cache_path_list)
return cache_path
def cache_path(path: str, file: str, sha_hash: str, key: str) -> str:
cache_path = os.path.join(cache_dir(path, file), key + "_" + sha_hash)
return cache_path
def is_old_version(path: str, record: str) -> bool:
version_hash = get_version_hash(path, record)
file, key = record2name_key(record)
meas_cache_path = os.path.join(cache_dir(path, file))
ls = []
is_old = True
for p, ds, fs in os.walk(meas_cache_path):
ls.extend(fs)
for filename in ls:
if key == filename.split("_")[0]:
if version_hash == filename.split("_")[1][:-2]:
is_old = False
return is_old
def is_in_cache(path: str, record: str) -> bool:
version_hash = get_version_hash(path, record)
file, key = record2name_key(record)
return os.path.exists(cache_path(path, file, version_hash, key) + ".p")

View file

@ -6,7 +6,8 @@ from .toml import import_tomls, update_project, reimport_project
from .find import find_record, list_projects from .find import find_record, list_projects
from .tools import str2list from .tools import str2list
from .main import update_aliases from .main import update_aliases
from .cache_io import drop_cache_files as cio_drop_cache_files from .meas_io import drop_cache as mio_drop_cache
from .meas_io import load_record as mio_load_record
import os import os
from importlib.metadata import version from importlib.metadata import version
@ -35,6 +36,7 @@ def update(
update_project(path, uuid) update_project(path, uuid)
return return
@app.command() @app.command()
def list( def list(
path: str = typer.Option( path: str = typer.Option(
@ -94,12 +96,39 @@ def find(
ensemble: str = typer.Argument(), ensemble: str = typer.Argument(),
corr: str = typer.Argument(), corr: str = typer.Argument(),
code: str = typer.Argument(), code: str = typer.Argument(),
arg: str = typer.Option(
str('all'),
"--argument",
"-a",
),
) -> None: ) -> None:
""" """
Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator. Find a record in the backlog at hand. Through specifying it's ensemble and the measured correlator.
""" """
results = find_record(path, ensemble, corr, code) results = find_record(path, ensemble, corr, code)
if arg == 'all':
print(results) print(results)
else:
for r in results[arg].values:
print(r)
@app.command()
def stat(
path: str = typer.Option(
str('./corrlib'),
"--dataset",
"-d",
),
record: str = typer.Argument(),
) -> None:
"""
Show the statistics of a given record.
"""
record = mio_load_record(path, record)[0]
statistics = record.idl
print(statistics)
return
@app.command() @app.command()
@ -180,7 +209,7 @@ def drop_cache(
""" """
Drop the currect cache directory of the dataset. Drop the currect cache directory of the dataset.
""" """
cio_drop_cache_files(path) mio_drop_cache(path)
return return

View file

@ -4,7 +4,7 @@ import json
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from .input.implementations import codes from .input.implementations import codes
from .tools import k2m, db_filename from .tools import k2m, get_db_file
from .tracker import get from .tracker import get
from typing import Any, Optional from typing import Any, Optional
@ -230,7 +230,7 @@ def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None, def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None,
created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame: created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame:
db_file = db_filename(path) db_file = get_db_file(path)
db = os.path.join(path, db_file) db = os.path.join(path, db_file)
if code not in codes: if code not in codes:
raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes)) raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes))
@ -262,7 +262,7 @@ def find_project(path: str, name: str) -> str:
uuid: str uuid: str
The uuid of the project in question. The uuid of the project in question.
""" """
db_file = db_filename(path) db_file = get_db_file(path)
get(path, db_file) get(path, db_file)
return _project_lookup_by_alias(os.path.join(path, db_file), name) return _project_lookup_by_alias(os.path.join(path, db_file), name)
@ -281,7 +281,7 @@ def list_projects(path: str) -> list[tuple[str, str]]:
results: list[Any] results: list[Any]
The projects known to the library. The projects known to the library.
""" """
db_file = db_filename(path) db_file = get_db_file(path)
get(path, db_file) get(path, db_file)
conn = sqlite3.connect(os.path.join(path, db_file)) conn = sqlite3.connect(os.path.join(path, db_file))
c = conn.cursor() c = conn.cursor()

View file

@ -26,8 +26,7 @@ def _create_db(db: str) -> None:
parameters TEXT, parameters TEXT,
parameter_file TEXT, parameter_file TEXT,
created_at TEXT, created_at TEXT,
updated_at TEXT, updated_at TEXT)''')
current_version TEXT)''')
c.execute('''CREATE TABLE IF NOT EXISTS projects c.execute('''CREATE TABLE IF NOT EXISTS projects
(id TEXT PRIMARY KEY, (id TEXT PRIMARY KEY,
aliases TEXT, aliases TEXT,
@ -72,7 +71,6 @@ def _create_config(path: str, tracker: str, cached: bool) -> ConfigParser:
'archive_path': 'archive', 'archive_path': 'archive',
'toml_imports_path': 'toml_imports', 'toml_imports_path': 'toml_imports',
'import_scripts_path': 'import_scripts', 'import_scripts_path': 'import_scripts',
'cache_path': '.cache',
} }
return config return config

View file

@ -3,6 +3,7 @@ import datalad.api as dl
import json import json
import os import os
from typing import Any from typing import Any
from fnmatch import fnmatch
bi_corrs: list[str] = ["f_P", "fP", "f_p", bi_corrs: list[str] = ["f_P", "fP", "f_p",
@ -298,6 +299,7 @@ def read_data(path: str, project: str, dir_in_project: str, prefix: str, param:
if not appended: if not appended:
compact = (version[-1] == "c") compact = (version[-1] == "c")
for i, item in enumerate(ls): for i, item in enumerate(ls):
if fnmatch(item, prefix + "*"):
rep_path = directory + '/' + item rep_path = directory + '/' + item
sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, []) sub_ls = pe.input.sfcf._find_files(rep_path, prefix, compact, [])
files_to_get.extend([rep_path + "/" + filename for filename in sub_ls]) files_to_get.extend([rep_path + "/" + filename for filename in sub_ls])

View file

@ -5,7 +5,7 @@ import os
from .git_tools import move_submodule from .git_tools import move_submodule
import shutil import shutil
from .find import _project_lookup_by_id from .find import _project_lookup_by_id
from .tools import list2str, str2list, db_filename from .tools import list2str, str2list, get_db_file
from .tracker import get, save, unlock, clone, drop from .tracker import get, save, unlock, clone, drop
from typing import Union, Optional from typing import Union, Optional
@ -25,7 +25,7 @@ def create_project(path: str, uuid: str, owner: Union[str, None]=None, tags: Uni
code: str (optional) code: str (optional)
The code that was used to create the measurements. The code that was used to create the measurements.
""" """
db_file = db_filename(path) db_file = get_db_file(path)
db = os.path.join(path, db_file) db = os.path.join(path, db_file)
get(path, db_file) get(path, db_file)
conn = sqlite3.connect(db) conn = sqlite3.connect(db)
@ -64,7 +64,7 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None]
value: str or None value: str or None
Value to se `prop` to. Value to se `prop` to.
""" """
db_file = db_filename(path) db_file = get_db_file(path)
get(path, db_file) get(path, db_file)
conn = sqlite3.connect(os.path.join(path, db_file)) conn = sqlite3.connect(os.path.join(path, db_file))
c = conn.cursor() c = conn.cursor()
@ -75,7 +75,7 @@ def update_project_data(path: str, uuid: str, prop: str, value: Union[str, None]
def update_aliases(path: str, uuid: str, aliases: list[str]) -> None: def update_aliases(path: str, uuid: str, aliases: list[str]) -> None:
db_file = db_filename(path) db_file = get_db_file(path)
db = os.path.join(path, db_file) db = os.path.join(path, db_file)
get(path, db_file) get(path, db_file)
known_data = _project_lookup_by_id(db, uuid)[0] known_data = _project_lookup_by_id(db, uuid)[0]
@ -135,7 +135,7 @@ def import_project(path: str, url: str, owner: Union[str, None]=None, tags: Opti
if not uuid: if not uuid:
raise ValueError("The dataset does not have a uuid!") raise ValueError("The dataset does not have a uuid!")
if not os.path.exists(path + "/projects/" + uuid): if not os.path.exists(path + "/projects/" + uuid):
db_file = db_filename(path) db_file = get_db_file(path)
get(path, db_file) get(path, db_file)
unlock(path, db_file) unlock(path, db_file)
create_project(path, uuid, owner, tags, aliases, code) create_project(path, uuid, owner, tags, aliases, code)

View file

@ -3,14 +3,13 @@ import os
import sqlite3 import sqlite3
from .input import sfcf,openQCD from .input import sfcf,openQCD
import json import json
from typing import Union, Any from typing import Union
from pyerrors import Obs, Corr, load_object, dump_object from pyerrors import Obs, Corr, dump_object, load_object
from hashlib import sha256 from hashlib import sha256
from .tools import record2name_key, name_key2record, make_version_hash from .tools import get_db_file, cache_enabled
from .cache_io import is_in_cache, cache_path, cache_dir, get_version_hash
from .tools import db_filename, cache_enabled
from .tracker import get, save, unlock from .tracker import get, save, unlock
import shutil import shutil
from typing import Any
def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None: def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str, dict[str, Any]]], uuid: str, code: str, parameter_file: str) -> None:
@ -33,7 +32,7 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
parameter_file: str parameter_file: str
The parameter file used for the measurement. The parameter file used for the measurement.
""" """
db_file = db_filename(path) db_file = get_db_file(path)
db = os.path.join(path, db_file) db = os.path.join(path, db_file)
get(path, db_file) get(path, db_file)
unlock(path, db_file) unlock(path, db_file)
@ -86,18 +85,18 @@ def write_measurement(path: str, ensemble: str, measurement: dict[str, dict[str,
subkey = "/".join(par_list) subkey = "/".join(par_list)
subkeys = [subkey] subkeys = [subkey]
pars[subkey] = json.dumps(parameters) pars[subkey] = json.dumps(parameters)
meas_paths = []
for subkey in subkeys: for subkey in subkeys:
par_hash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest() parHash = sha256(str(pars[subkey]).encode('UTF-8')).hexdigest()
meas_path = name_key2record(file_in_archive, par_hash) meas_path = file_in_archive + "::" + parHash
meas_paths.append(meas_path)
known_meas[par_hash] = measurement[corr][subkey] known_meas[parHash] = measurement[corr][subkey]
data_hash = make_version_hash(path, meas_path)
if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is None: if c.execute("SELECT * FROM backlogs WHERE path = ?", (meas_path,)).fetchone() is not None:
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'))", c.execute("UPDATE backlogs SET updated_at = datetime('now') WHERE path = ?", (meas_path, ))
else:
c.execute("INSERT INTO backlogs (name, ensemble, code, path, project, parameters, parameter_file, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, datetime('now'), datetime('now'))",
(corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file)) (corr, ensemble, code, meas_path, uuid, pars[subkey], parameter_file))
c.execute("UPDATE backlogs SET current_version = ?, updated_at = datetime('now') WHERE path = ?", (data_hash, meas_path)) conn.commit()
pj.dump_dict_to_json(known_meas, file) pj.dump_dict_to_json(known_meas, file)
files.append(os.path.join(path, db_file)) files.append(os.path.join(path, db_file))
conn.close() conn.close()
@ -124,7 +123,7 @@ def load_record(path: str, meas_path: str) -> Union[Corr, Obs]:
return load_records(path, [meas_path])[0] return load_records(path, [meas_path])[0]
def load_records(path: str, record_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]: def load_records(path: str, meas_paths: list[str], preloaded: dict[str, Any] = {}) -> list[Union[Corr, Obs]]:
""" """
Load a list of records by their paths. Load a list of records by their paths.
@ -143,32 +142,70 @@ def load_records(path: str, record_paths: list[str], preloaded: dict[str, Any] =
The loaded records. The loaded records.
""" """
needed_data: dict[str, list[str]] = {} needed_data: dict[str, list[str]] = {}
for rpath in record_paths: for mpath in meas_paths:
file, key = record2name_key(rpath) file = mpath.split("::")[0]
if file not in needed_data.keys(): if file not in needed_data.keys():
needed_data[file] = [] needed_data[file] = []
key = mpath.split("::")[1]
needed_data[file].append(key) needed_data[file].append(key)
returned_data: list[Any] = [] returned_data: list[Any] = []
for file in needed_data.keys(): for file in needed_data.keys():
for key in list(needed_data[file]): for key in list(needed_data[file]):
record = name_key2record(file, key) if os.path.exists(cache_path(path, file, key) + ".p"):
current_version = get_version_hash(path, record) returned_data.append(load_object(cache_path(path, file, key) + ".p"))
if is_in_cache(path, record):
returned_data.append(load_object(cache_path(path, file, current_version, key) + ".p"))
else: else:
if file not in preloaded: if file not in preloaded:
preloaded[file] = preload(path, file) preloaded[file] = preload(path, file)
returned_data.append(preloaded[file][key]) returned_data.append(preloaded[file][key])
if cache_enabled(path): if cache_enabled(path):
if not is_in_cache(path, record):
file, key = record2name_key(record)
if not os.path.exists(cache_dir(path, file)): if not os.path.exists(cache_dir(path, file)):
os.makedirs(cache_dir(path, file)) os.makedirs(cache_dir(path, file))
current_version = get_version_hash(path, record) dump_object(preloaded[file][key], cache_path(path, file, key))
dump_object(preloaded[file][key], cache_path(path, file, current_version, key))
return returned_data return returned_data
def cache_dir(path: str, file: str) -> str:
"""
Returns the directory corresponding to the cache for the given file.
Parameters
----------
path: str
The path of the library.
file: str
The file in the library that we want to access the cached data of.
Returns
-------
cache_path: str
The path holding the cached data for the given file.
"""
cache_path_list = [path]
cache_path_list.append(".cache")
cache_path_list.extend(file.split("/")[1:])
cache_path = "/".join(cache_path_list)
return cache_path
def cache_path(path: str, file: str, key: str) -> str:
"""
Parameters
----------
path: str
The path of the library.
file: str
The file in the library that we want to access the cached data of.
key: str
The key within the archive file.
Returns
-------
cache_path: str
The path at which the measurement of the given file and key is cached.
"""
cache_path = os.path.join(cache_dir(path, file), key)
return cache_path
def preload(path: str, file: str) -> dict[str, Any]: def preload(path: str, file: str) -> dict[str, Any]:
""" """
Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method. Read the contents of a file into a json dictionary with the pyerrors.json.load_json_dict method.
@ -204,7 +241,7 @@ def drop_record(path: str, meas_path: str) -> None:
""" """
file_in_archive = meas_path.split("::")[0] file_in_archive = meas_path.split("::")[0]
file = os.path.join(path, file_in_archive) file = os.path.join(path, file_in_archive)
db_file = db_filename(path) db_file = get_db_file(path)
db = os.path.join(path, db_file) db = os.path.join(path, db_file)
get(path, db_file) get(path, db_file)
sub_key = meas_path.split("::")[1] sub_key = meas_path.split("::")[1]

View file

@ -1,7 +1,6 @@
import os import os
import hashlib
from configparser import ConfigParser from configparser import ConfigParser
from typing import Any, Union from typing import Any
CONFIG_FILENAME = ".corrlib" CONFIG_FILENAME = ".corrlib"
cached: bool = True cached: bool = True
@ -23,7 +22,6 @@ def str2list(string: str) -> list[str]:
""" """
return string.split(",") return string.split(",")
def list2str(mylist: list[str]) -> str: def list2str(mylist: list[str]) -> str:
""" """
Convert a list to a comma-separated string. Convert a list to a comma-separated string.
@ -41,7 +39,6 @@ def list2str(mylist: list[str]) -> str:
s = ",".join(mylist) s = ",".join(mylist)
return s return s
def m2k(m: float) -> float: def m2k(m: float) -> float:
""" """
Convert to bare quark mas $m$ to inverse mass parameter $kappa$. Convert to bare quark mas $m$ to inverse mass parameter $kappa$.
@ -76,47 +73,6 @@ def k2m(k: float) -> float:
return (1/(2*k))-4 return (1/(2*k))-4
def record2name_key(record_path: str) -> tuple[str, str]:
"""
Convert a record to a pair of name and key.
Parameters
----------
record: str
Returns
-------
name: str
key: str
"""
file = record_path.split("::")[0]
key = record_path.split("::")[1]
return file, key
def name_key2record(name: str, key: str) -> str:
"""
Convert a name and a key to a record name.
Parameters
----------
name: str
key: str
Returns
-------
record: str
"""
return name + "::" + key
def make_version_hash(path: str, record: str) -> str:
file, key = record2name_key(record)
with open(os.path.join(path, file), 'rb') as fp:
file_hash = hashlib.file_digest(fp, 'sha1').hexdigest()
return file_hash
def set_config(path: str, section: str, option: str, value: Any) -> None: def set_config(path: str, section: str, option: str, value: Any) -> None:
""" """
Set configuration parameters for the library. Set configuration parameters for the library.
@ -144,7 +100,7 @@ def set_config(path: str, section: str, option: str, value: Any) -> None:
return return
def db_filename(path: str) -> str: def get_db_file(path: str) -> str:
""" """
Get the database file associated with the library at the given path. Get the database file associated with the library at the given path.
@ -188,28 +144,3 @@ def cache_enabled(path: str) -> bool:
cached_str = config.get('core', 'cached', fallback='True') cached_str = config.get('core', 'cached', fallback='True')
cached_bool = cached_str == ('True') cached_bool = cached_str == ('True')
return cached_bool return cached_bool
def cache_dir_name(path: str) -> Union[str, None]:
"""
Get the database file associated with the library at the given path.
Parameters
----------
path: str
The path of the library.
Returns
-------
db_file: str
The file holding the database.
"""
config_path = os.path.join(path, CONFIG_FILENAME)
config = ConfigParser()
if os.path.exists(config_path):
config.read(config_path)
if cache_enabled(path):
cache = config.get('paths', 'cache', fallback='.cache')
else:
cache = None
return cache

View file

@ -3,7 +3,7 @@ from configparser import ConfigParser
import datalad.api as dl import datalad.api as dl
from typing import Optional from typing import Optional
import shutil import shutil
from .tools import db_filename from .tools import get_db_file
def get_tracker(path: str) -> str: def get_tracker(path: str) -> str:
@ -43,7 +43,7 @@ def get(path: str, file: str) -> None:
""" """
tracker = get_tracker(path) tracker = get_tracker(path)
if tracker == 'datalad': if tracker == 'datalad':
if file == db_filename(path): if file == get_db_file(path):
print("Downloading database...") print("Downloading database...")
else: else:
print("Downloading data...") print("Downloading data...")

View file

@ -1,18 +0,0 @@
from setuptools import setup
from distutils.util import convert_path
version = {}
with open(convert_path('corrlib/version.py')) as ver_file:
exec(ver_file.read(), version)
setup(name='pycorrlib',
version=version['__version__'],
author='Justus Kuhlmann',
author_email='j_kuhl19@uni-muenster.de',
install_requires=['pyerrors>=2.11.1', 'datalad>=1.1.0', 'typer>=0.12.5', 'gitpython>=3.1.45'],
entry_points = {
'console_scripts': ['pcl=corrlib.cli:app'],
},
packages=['corrlib', 'corrlib.input']
)

View file

@ -2,19 +2,18 @@ from typer.testing import CliRunner
from corrlib.cli import app from corrlib.cli import app
import os import os
import sqlite3 as sql import sqlite3 as sql
from pathlib import Path
runner = CliRunner() runner = CliRunner()
def test_version() -> None: def test_version():
result = runner.invoke(app, ["--version"]) result = runner.invoke(app, ["--version"])
assert result.exit_code == 0 assert result.exit_code == 0
assert "corrlib" in result.output assert "corrlib" in result.output
def test_init_folders(tmp_path: Path) -> None: def test_init_folders(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
assert result.exit_code == 0 assert result.exit_code == 0
@ -22,7 +21,7 @@ def test_init_folders(tmp_path: Path) -> None:
assert os.path.exists(str(dataset_path / "backlogger.db")) assert os.path.exists(str(dataset_path / "backlogger.db"))
def test_init_db(tmp_path: Path) -> None: def test_init_db(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
assert result.exit_code == 0 assert result.exit_code == 0
@ -82,7 +81,7 @@ def test_init_db(tmp_path: Path) -> None:
assert expected_col in backlog_column_names assert expected_col in backlog_column_names
def test_list(tmp_path: Path) -> None: def test_list(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
result = runner.invoke(app, ["init", "--dataset", str(dataset_path)]) result = runner.invoke(app, ["init", "--dataset", str(dataset_path)])
assert result.exit_code == 0 assert result.exit_code == 0

View file

@ -1,7 +1,7 @@
import corrlib.toml as t import corrlib.toml as t
def test_toml_check_measurement_data() -> None: def test_toml_check_measurement_data():
measurements = { measurements = {
"a": "a":
{ {

View file

@ -1,7 +1,7 @@
import corrlib.input.sfcf as input import corrlib.input.sfcf as input
import json import json
def test_get_specs() -> None: def test_get_specs():
parameters = { parameters = {
'crr': [ 'crr': [
'f_P', 'f_A' 'f_P', 'f_A'

View file

@ -1,23 +1,22 @@
import corrlib.initialization as init import corrlib.initialization as init
import os import os
import sqlite3 as sql import sqlite3 as sql
from pathlib import Path
def test_init_folders(tmp_path: Path) -> None: def test_init_folders(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path)) init.create(str(dataset_path))
assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db")) assert os.path.exists(str(dataset_path / "backlogger.db"))
def test_init_folders_no_tracker(tmp_path: Path) -> None: def test_init_folders_no_tracker(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path), tracker="None") init.create(str(dataset_path), tracker="None")
assert os.path.exists(str(dataset_path)) assert os.path.exists(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db")) assert os.path.exists(str(dataset_path / "backlogger.db"))
def test_init_config(tmp_path: Path) -> None: def test_init_config(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path), tracker="None") init.create(str(dataset_path), tracker="None")
config_path = dataset_path / ".corrlib" config_path = dataset_path / ".corrlib"
@ -35,7 +34,7 @@ def test_init_config(tmp_path: Path) -> None:
assert config.get("paths", "import_scripts_path") == "import_scripts" assert config.get("paths", "import_scripts_path") == "import_scripts"
def test_init_db(tmp_path: Path) -> None: def test_init_db(tmp_path):
dataset_path = tmp_path / "test_dataset" dataset_path = tmp_path / "test_dataset"
init.create(str(dataset_path)) init.create(str(dataset_path))
assert os.path.exists(str(dataset_path / "backlogger.db")) assert os.path.exists(str(dataset_path / "backlogger.db"))

View file

@ -3,29 +3,29 @@
from corrlib import tools as tl from corrlib import tools as tl
def test_m2k() -> None: def test_m2k():
for m in [0.1, 0.5, 1.0]: for m in [0.1, 0.5, 1.0]:
expected_k = 1 / (2 * m + 8) expected_k = 1 / (2 * m + 8)
assert tl.m2k(m) == expected_k assert tl.m2k(m) == expected_k
def test_k2m() -> None: def test_k2m():
for m in [0.1, 0.5, 1.0]: for m in [0.1, 0.5, 1.0]:
assert tl.k2m(m) == (1/(2*m))-4 assert tl.k2m(m) == (1/(2*m))-4
def test_k2m_m2k() -> None: def test_k2m_m2k():
for m in [0.1, 0.5, 1.0]: for m in [0.1, 0.5, 1.0]:
k = tl.m2k(m) k = tl.m2k(m)
m_converted = tl.k2m(k) m_converted = tl.k2m(k)
assert abs(m - m_converted) < 1e-9 assert abs(m - m_converted) < 1e-9
def test_str2list() -> None: def test_str2list():
assert tl.str2list("a,b,c") == ["a", "b", "c"] assert tl.str2list("a,b,c") == ["a", "b", "c"]
assert tl.str2list("1,2,3") == ["1", "2", "3"] assert tl.str2list("1,2,3") == ["1", "2", "3"]
def test_list2str() -> None: def test_list2str():
assert tl.list2str(["a", "b", "c"]) == "a,b,c" assert tl.list2str(["a", "b", "c"]) == "a,b,c"
assert tl.list2str(["1", "2", "3"]) == "1,2,3" assert tl.list2str(["1", "2", "3"]) == "1,2,3"