corrlib/corrlib/find.py
Justus Kuhlmann 4631769e81
Some checks failed
Mypy / mypy (push) Failing after 2s
Pytest / pytest (3.12) (push) Failing after 2s
Ruff / ruff (push) Failing after 2s
Mypy / mypy (pull_request) Failing after 2s
Pytest / pytest (3.12) (pull_request) Failing after 2s
Pytest / pytest (3.13) (pull_request) Failing after 1s
Pytest / pytest (3.14) (pull_request) Failing after 2s
Pytest / pytest (3.13) (push) Failing after 2s
Pytest / pytest (3.14) (push) Failing after 2s
Ruff / ruff (pull_request) Failing after 2s
finish a first pitch of docstrings
2026-02-12 18:39:04 +01:00

292 lines
11 KiB
Python

import sqlite3
import os
import json
import pandas as pd
import numpy as np
from .input.implementations import codes
from .tools import k2m, get_db_file
from .tracker import get
from typing import Any, Optional
def _project_lookup_by_alias(db: str, alias: str) -> str:
"""
Lookup a projects UUID by its (human-readable) alias.
Parameters
----------
db: str
The database to look up the project.
alias: str
The alias to look up.
Returns
-------
uuid: str
The UUID of the project with the given alias.
"""
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT * FROM 'projects' WHERE alias = '{alias}'")
results = c.fetchall()
conn.close()
if len(results)>1:
print("Error: multiple projects found with alias " + alias)
elif len(results) == 0:
raise Exception("Error: no project found with alias " + alias)
return str(results[0][0])
def _project_lookup_by_id(db: str, uuid: str) -> list[tuple[str, str]]:
"""
Return the project information available in the database by UUID.
Parameters
----------
db: str
The database to look up the project.
uuid: str
The uuid of the project in question.
Returns
-------
results: list
The row of the project in the database.
"""
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute(f"SELECT * FROM 'projects' WHERE id = '{uuid}'")
results = c.fetchall()
conn.close()
return results
def _db_lookup(db: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None,
created_before: Optional[str]=None, created_after: Optional[Any]=None, updated_before: Optional[Any]=None, updated_after: Optional[Any]=None) -> pd.DataFrame:
"""
Look up a correlator record in the database by the data given to the method.
Parameters
----------
db: str
The database to look up the record.
ensemble: str
The ensemble the record is associated with.
correlator_name: str
The name of the correlator in question.
code: str
The name of the code which was used to calculate the correlator.
project: str, optional
The UUID of the project the correlator was calculated in.
parameters: str, optional
A dictionary holding the exact parameters for the measurement that are held in the database.
created_before: str, optional
Timestamp string before which the meaurement has been created.
created_after: str, optional
Timestamp string after which the meaurement has been created.
updated_before: str, optional
Timestamp string before which the meaurement has been updated.
updated_after: str, optional
Timestamp string after which the meaurement has been updated.
Returns
-------
results: pd.DataFrame
A pandas DataFrame holding the information received form the DB query.
"""
project_str = project
search_expr = f"SELECT * FROM 'backlogs' WHERE name = '{correlator_name}' AND ensemble = '{ensemble}'"
if project:
search_expr += f" AND project = '{project_str}'"
if code:
search_expr += f" AND code = '{code}'"
if parameters:
search_expr += f" AND parameters = '{parameters}'"
if created_before:
search_expr += f" AND created_at < '{created_before}'"
if created_after:
search_expr += f" AND created_at > '{created_after}'"
if updated_before:
search_expr += f" AND updated_at < '{updated_before}'"
if updated_after:
search_expr += f" AND updated_at > '{updated_after}'"
conn = sqlite3.connect(db)
results = pd.read_sql(search_expr, conn)
conn.close()
return results
def sfcf_filter(results: pd.DataFrame, **kwargs: Any) -> pd.DataFrame:
"""
Filter method for the Database entries holding SFCF calculations.
Parameters
----------
results: pd.DataFrame
The unfiltered pandas DataFrame holding the entries from the database.
offset: list[float], optional
quark_kappas: list[float]
quarks_masses: list[float]
qk1: float, optional
Mass parameter $\kappa_1$ of the first quark.
qk2: float, optional
Mass parameter $\kappa_2$ of the first quark.
qm1: float, optional
Bare quak mass $m_1$ of the first quark.
qm2: float, optional
Bare quak mass $m_1$ of the first quark.
quarks_thetas: list[list[float]], optional
wf1: optional
wf2: optional
Results
-------
results: pd.DataFrame
The filtered DataFrame, only holding the records that fit to the parameters given.
"""
drops = []
for ind in range(len(results)):
result = results.iloc[ind]
param = json.loads(result['parameters'])
if 'offset' in kwargs:
if kwargs.get('offset') != param['offset']:
drops.append(ind)
continue
if 'quark_kappas' in kwargs:
kappas = kwargs['quark_kappas']
if (not np.isclose(kappas[0], param['quarks'][0]['mass']) or not np.isclose(kappas[1], param['quarks'][1]['mass'])):
drops.append(ind)
continue
if 'quark_masses' in kwargs:
masses = kwargs['quark_masses']
if (not np.isclose(masses[0], k2m(param['quarks'][0]['mass'])) or not np.isclose(masses[1], k2m(param['quarks'][1]['mass']))):
drops.append(ind)
continue
if 'qk1' in kwargs:
quark_kappa1 = kwargs['qk1']
if not isinstance(quark_kappa1, list):
if (not np.isclose(quark_kappa1, param['quarks'][0]['mass'])):
drops.append(ind)
continue
else:
if len(quark_kappa1) == 2:
if (quark_kappa1[0] > param['quarks'][0]['mass']) or (quark_kappa1[1] < param['quarks'][0]['mass']):
drops.append(ind)
continue
if 'qk2' in kwargs:
quark_kappa2 = kwargs['qk2']
if not isinstance(quark_kappa2, list):
if (not np.isclose(quark_kappa2, param['quarks'][1]['mass'])):
drops.append(ind)
continue
else:
if len(quark_kappa2) == 2:
if (quark_kappa2[0] > param['quarks'][1]['mass']) or (quark_kappa2[1] < param['quarks'][1]['mass']):
drops.append(ind)
continue
if 'qm1' in kwargs:
quark_mass1 = kwargs['qm1']
if not isinstance(quark_mass1, list):
if (not np.isclose(quark_mass1, k2m(param['quarks'][0]['mass']))):
drops.append(ind)
continue
else:
if len(quark_mass1) == 2:
if (quark_mass1[0] > k2m(param['quarks'][0]['mass'])) or (quark_mass1[1] < k2m(param['quarks'][0]['mass'])):
drops.append(ind)
continue
if 'qm2' in kwargs:
quark_mass2 = kwargs['qm2']
if not isinstance(quark_mass2, list):
if (not np.isclose(quark_mass2, k2m(param['quarks'][1]['mass']))):
drops.append(ind)
continue
else:
if len(quark_mass2) == 2:
if (quark_mass2[0] > k2m(param['quarks'][1]['mass'])) or (quark_mass2[1] < k2m(param['quarks'][1]['mass'])):
drops.append(ind)
continue
if 'quark_thetas' in kwargs:
quark_thetas = kwargs['quark_thetas']
if (quark_thetas[0] != param['quarks'][0]['thetas'] and quark_thetas[1] != param['quarks'][1]['thetas']) or (quark_thetas[0] != param['quarks'][1]['thetas'] and quark_thetas[1] != param['quarks'][0]['thetas']):
drops.append(ind)
continue
# careful, this is not save, when multiple contributions are present!
if 'wf1' in kwargs:
wf1 = kwargs['wf1']
if not (np.isclose(wf1[0][0], param['wf1'][0][0], 1e-8) and np.isclose(wf1[0][1][0], param['wf1'][0][1][0], 1e-8) and np.isclose(wf1[0][1][1], param['wf1'][0][1][1], 1e-8)):
drops.append(ind)
continue
if 'wf2' in kwargs:
wf2 = kwargs['wf2']
if not (np.isclose(wf2[0][0], param['wf2'][0][0], 1e-8) and np.isclose(wf2[0][1][0], param['wf2'][0][1][0], 1e-8) and np.isclose(wf2[0][1][1], param['wf2'][0][1][1], 1e-8)):
drops.append(ind)
continue
return results.drop(drops)
def find_record(path: str, ensemble: str, correlator_name: str, code: str, project: Optional[str]=None, parameters: Optional[str]=None,
created_before: Optional[str]=None, created_after: Optional[str]=None, updated_before: Optional[str]=None, updated_after: Optional[str]=None, revision: Optional[str]=None, **kwargs: Any) -> pd.DataFrame:
db_file = get_db_file(path)
db = os.path.join(path, db_file)
if code not in codes:
raise ValueError("Code " + code + "unknown, take one of the following:" + ", ".join(codes))
get(path, db_file)
results = _db_lookup(db, ensemble, correlator_name,code, project, parameters=parameters, created_before=created_before, created_after=created_after, updated_before=updated_before, updated_after=updated_after)
if code == "sfcf":
results = sfcf_filter(results, **kwargs)
elif code == "openQCD":
pass
else:
raise Exception
print("Found " + str(len(results)) + " result" + ("s" if len(results)>1 else ""))
return results.reset_index()
def find_project(path: str, name: str) -> str:
"""
Find a project by it's human readable name.
Parameters
----------
path: str
The path of the library.
name: str
The name of the project to look for in the library.
Returns
-------
uuid: str
The uuid of the project in question.
"""
db_file = get_db_file(path)
get(path, db_file)
return _project_lookup_by_alias(os.path.join(path, db_file), name)
def list_projects(path: str) -> list[tuple[str, str]]:
"""
List all projects known to the library.
Parameters
----------
path: str
The path of the library.
Returns
-------
results: list[Any]
The projects known to the library.
"""
db_file = get_db_file(path)
get(path, db_file)
conn = sqlite3.connect(os.path.join(path, db_file))
c = conn.cursor()
c.execute("SELECT id,aliases FROM projects")
results = c.fetchall()
conn.close()
return results