Initial public release

This commit is contained in:
fjosw 2020-10-13 16:53:00 +02:00
commit d9b2077d2c
24 changed files with 6794 additions and 0 deletions

5
pyerrors/__init__.py Normal file
View file

@ -0,0 +1,5 @@
from .pyerrors import *
from . import fits
from . import linalg
from . import misc
from . import mpm

730
pyerrors/fits.py Normal file
View file

@ -0,0 +1,730 @@
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import autograd.numpy as anp
import scipy.optimize
import scipy.stats
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy.odr import ODR, Model, RealData
import iminuit
from autograd import jacobian
from autograd import elementwise_grad as egrad
from .pyerrors import Obs, derived_observable, covariance, pseudo_Obs
def standard_fit(x, y, func, silent=False, **kwargs):
"""Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
x has to be a list of floats.
y has to be a list of Obs, the dvalues of the Obs are used as yerror for the fit.
func has to be of the form
def func(a, x):
return a[0] + a[1] * x + a[2] * anp.sinh(x)
For multiple x values func can be of the form
def func(a, x):
(x1, x2) = x
return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
will not work
Keyword arguments
-----------------
dict_output -- If true, the output is a dictionary containing all relevant
data instead of just a list of the fit parameters.
silent -- If true all output to the console is omitted (default False).
initial_guess -- can provide an initial guess for the input parameters. Relevant for
non-linear fits with many parameters.
method -- can be used to choose an alternative method for the minimization of chisquare.
The possible methods are the ones which can be used for scipy.optimize.minimize and
migrad of iminuit. If no method is specified, Levenberg-Marquard is used.
Reliable alternatives are migrad, Powell and Nelder-Mead.
resplot -- If true, a plot which displays fit, data and residuals is generated (default False).
qqplot -- If true, a quantile-quantile plot of the fit result is generated (default False).
expected_chisquare -- If true prints the expected chisquare which is
corrected by effects caused by correlated input data.
This can take a while as the full correlation matrix
has to be calculated (default False).
"""
result_dict = {}
result_dict['fit_function'] = func
x = np.asarray(x)
if x.shape[-1] != len(y):
raise Exception('x and y input have to have the same length')
if len(x.shape) > 2:
raise Exception('Unkown format for x values')
if not callable(func):
raise TypeError('func has to be a function.')
for i in range(25):
try:
func(np.arange(i), x.T[0])
except:
pass
else:
break
n_parms = i
if not silent:
print('Fit with', n_parms, 'parameters')
y_f = [o.value for o in y]
dy_f = [o.dvalue for o in y]
if np.any(np.asarray(dy_f) <= 0.0):
raise Exception('No y errors available, run the gamma method first.')
if 'initial_guess' in kwargs:
x0 = kwargs.get('initial_guess')
if len(x0) != n_parms:
raise Exception('Initial guess does not have the correct length.')
else:
x0 = [0.1] * n_parms
def chisqfunc(p):
model = func(p, x)
chisq = anp.sum(((y_f - model) / dy_f) ** 2)
return chisq
if 'method' in kwargs:
result_dict['method'] = kwargs.get('method')
if not silent:
print('Method:', kwargs.get('method'))
if kwargs.get('method') == 'migrad':
fit_result = iminuit.minimize(chisqfunc, x0)
fit_result = iminuit.minimize(chisqfunc, fit_result.x)
else:
fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'))
fit_result = scipy.optimize.minimize(chisqfunc, fit_result.x, method=kwargs.get('method'), tol=1e-12)
chisquare = fit_result.fun
else:
result_dict['method'] = 'Levenberg-Marquardt'
if not silent:
print('Method: Levenberg-Marquardt')
def chisqfunc_residuals(p):
model = func(p, x)
chisq = ((y_f - model) / dy_f)
return chisq
fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
chisquare = np.sum(fit_result.fun ** 2)
if not fit_result.success:
raise Exception('The minimization procedure did not converge.')
if x.shape[-1] - n_parms > 0:
result_dict['chisquare/d.o.f.'] = chisquare / (x.shape[-1] - n_parms)
else:
result_dict['chisquare/d.o.f.'] = float('nan')
if not silent:
print(fit_result.message)
print('chisquare/d.o.f.:', result_dict['chisquare/d.o.f.'])
if kwargs.get('expected_chisquare') is True:
W = np.diag(1 / np.asarray(dy_f))
cov = covariance_matrix(y)
A = W @ jacobian(func)(fit_result.x, x)
P_phi = A @ np.linalg.inv(A.T @ A) @ A.T
expected_chisquare = np.trace((np.identity(x.shape[-1]) - P_phi) @ W @ cov @ W)
result_dict['chisquare/expected_chisquare'] = chisquare / expected_chisquare
if not silent:
print('chisquare/expected_chisquare:',
result_dict['chisquare/expected_chisquare'])
hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc))(fit_result.x))
def chisqfunc_compact(d):
model = func(d[:n_parms], x)
chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
return chisq
jac_jac = jacobian(jacobian(chisqfunc_compact))(np.concatenate((fit_result.x, y_f)))
deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
result = []
for i in range(n_parms):
result.append(derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(fit_result.x[i], 0.0, y[0].names[0], y[0].shape[y[0].names[0]])] + list(y), man_grad=[0] + list(deriv[i])))
result_dict['fit_parameters'] = result
result_dict['chisquare'] = chisqfunc(fit_result.x)
result_dict['d.o.f.'] = x.shape[-1] - n_parms
if kwargs.get('resplot') is True:
residual_plot(x, y, func, result)
if kwargs.get('qqplot') is True:
qqplot(x, y, func, result)
return result_dict if kwargs.get('dict_output') else result
def odr_fit(x, y, func, silent=False, **kwargs):
"""Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
x has to be a list of Obs, or a tuple of lists of Obs
y has to be a list of Obs
the dvalues of the Obs are used as x- and yerror for the fit.
func has to be of the form
def func(a, x):
y = a[0] + a[1] * x + a[2] * anp.sinh(x)
return y
For multiple x values func can be of the form
def func(a, x):
(x1, x2) = x
return a[0] * x1 ** 2 + a[1] * x2
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
will not work.
Based on the orthogonal distance regression module of scipy
Keyword arguments
-----------------
dict_output -- If true, the output is a dictionary containing all relevant
data instead of just a list of the fit parameters.
silent -- If true all output to the console is omitted (default False).
initial_guess -- can provide an initial guess for the input parameters. Relevant for non-linear
fits with many parameters.
expected_chisquare -- If true prints the expected chisquare which is
corrected by effects caused by correlated input data.
This can take a while as the full correlation matrix
has to be calculated (default False).
"""
result_dict = {}
result_dict['fit_function'] = func
x = np.array(x)
x_shape = x.shape
if not callable(func):
raise TypeError('func has to be a function.')
for i in range(25):
try:
func(np.arange(i), x.T[0])
except:
pass
else:
break
n_parms = i
if not silent:
print('Fit with', n_parms, 'parameters')
x_f = np.vectorize(lambda o: o.value)(x)
dx_f = np.vectorize(lambda o: o.dvalue)(x)
y_f = np.array([o.value for o in y])
dy_f = np.array([o.dvalue for o in y])
if np.any(np.asarray(dx_f) <= 0.0):
raise Exception('No x errors available, run the gamma method first.')
if np.any(np.asarray(dy_f) <= 0.0):
raise Exception('No y errors available, run the gamma method first.')
if 'initial_guess' in kwargs:
x0 = kwargs.get('initial_guess')
if len(x0) != n_parms:
raise Exception('Initial guess does not have the correct length.')
else:
x0 = [1] * n_parms
data = RealData(x_f, y_f, sx=dx_f, sy=dy_f)
model = Model(func)
odr = ODR(data, model, x0, partol=np.finfo(np.float).eps)
odr.set_job(fit_type=0, deriv=1)
output = odr.run()
result_dict['residual_variance'] = output.res_var
result_dict['method'] = 'ODR'
result_dict['xplus'] = output.xplus
if not silent:
print('Method: ODR')
print(*output.stopreason)
print('Residual variance:', result_dict['residual_variance'])
if output.info > 3:
raise Exception('The minimization procedure did not converge.')
m = x_f.size
def odr_chisquare(p):
model = func(p[:n_parms], p[n_parms:].reshape(x_shape))
chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((x_f - p[n_parms:].reshape(x_shape)) / dx_f) ** 2)
return chisq
if kwargs.get('expected_chisquare') is True:
W = np.diag(1 / np.asarray(np.concatenate((dy_f.ravel(), dx_f.ravel()))))
if kwargs.get('covariance') is not None:
cov = kwargs.get('covariance')
else:
cov = covariance_matrix(np.concatenate((y, x.ravel())))
number_of_x_parameters = int(m / x_f.shape[-1])
old_jac = jacobian(func)(output.beta, output.xplus)
fused_row1 = np.concatenate((old_jac, np.concatenate((number_of_x_parameters * [np.zeros(old_jac.shape)]), axis=0)))
fused_row2 = np.concatenate((jacobian(lambda x, y : func(y, x))(output.xplus, output.beta).reshape(x_f.shape[-1], x_f.shape[-1] * number_of_x_parameters), np.identity(number_of_x_parameters * old_jac.shape[0])))
new_jac = np.concatenate((fused_row1, fused_row2), axis=1)
A = W @ new_jac
P_phi = A @ np.linalg.inv(A.T @ A) @ A.T
expected_chisquare = np.trace((np.identity(P_phi.shape[0]) - P_phi) @ W @ cov @ W)
if expected_chisquare <= 0.0:
print('Warning, negative expected_chisquare.')
expected_chisquare = np.abs(expected_chisquare)
result_dict['chisquare/expected_chisquare'] = odr_chisquare(np.concatenate((output.beta, output.xplus.ravel()))) / expected_chisquare
if not silent:
print('chisquare/expected_chisquare:',
result_dict['chisquare/expected_chisquare'])
hess_inv = np.linalg.pinv(jacobian(jacobian(odr_chisquare))(np.concatenate((output.beta, output.xplus.ravel()))))
def odr_chisquare_compact_x(d):
model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((d[n_parms + m:].reshape(x_shape) - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
return chisq
jac_jac_x = jacobian(jacobian(odr_chisquare_compact_x))(np.concatenate((output.beta, output.xplus.ravel(), x_f.ravel())))
deriv_x = -hess_inv @ jac_jac_x[:n_parms + m, n_parms + m:]
def odr_chisquare_compact_y(d):
model = func(d[:n_parms], d[n_parms:n_parms + m].reshape(x_shape))
chisq = anp.sum(((d[n_parms + m:] - model) / dy_f) ** 2) + anp.sum(((x_f - d[n_parms:n_parms + m].reshape(x_shape)) / dx_f) ** 2)
return chisq
jac_jac_y = jacobian(jacobian(odr_chisquare_compact_y))(np.concatenate((output.beta, output.xplus.ravel(), y_f)))
deriv_y = -hess_inv @ jac_jac_y[:n_parms + m, n_parms + m:]
result = []
for i in range(n_parms):
result.append(derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(output.beta[i], 0.0, y[0].names[0], y[0].shape[y[0].names[0]])] + list(x.ravel()) + list(y), man_grad=[0] + list(deriv_x[i]) + list(deriv_y[i])))
result_dict['fit_parameters'] = result
result_dict['odr_chisquare'] = odr_chisquare(np.concatenate((output.beta, output.xplus.ravel())))
result_dict['d.o.f.'] = x.shape[-1] - n_parms
return result_dict if kwargs.get('dict_output') else result
def prior_fit(x, y, func, priors, silent=False, **kwargs):
"""Performs a non-linear fit to y = func(x) with given priors and returns a list of Obs corresponding to the fit parameters.
x has to be a list of floats.
y has to be a list of Obs, the dvalues of the Obs are used as yerror for the fit.
func has to be of the form
def func(a, x):
y = a[0] + a[1] * x + a[2] * anp.sinh(x)
return y
It is important that all numpy functions refer to autograd.numpy, otherwise the differentiation
will not work
priors has to be a list with an entry for every parameter in the fit. The entries can either be
Obs (e.g. results from a previous fit) or strings containing a value and an error formatted like
0.548(23), 500(40) or 0.5(0.4)
It is important for the subsequent error estimation that the e_tag for the gamma method is large
enough.
Keyword arguments
-----------------
dict_output -- If true, the output is a dictionary containing all relevant
data instead of just a list of the fit parameters.
silent -- If true all output to the console is omitted (default False).
initial_guess -- can provide an initial guess for the input parameters.
If no guess is provided, the prior values are used.
resplot -- if true, a plot which displays fit, data and residuals is generated (default False)
qqplot -- if true, a quantile-quantile plot of the fit result is generated (default False)
tol -- Specify the tolerance of the migrad solver (default 1e-4)
"""
result_dict = {}
result_dict['fit_function'] = func
if Obs.e_tag_global < 4:
print('WARNING: e_tag_global is smaller than 4, this can cause problems when calculating errors from fits with priors')
x = np.asarray(x)
if not callable(func):
raise TypeError('func has to be a function.')
for i in range(100):
try:
func(np.arange(i), 0)
except:
pass
else:
break
n_parms = i
if n_parms != len(priors):
raise Exception('Priors does not have the correct length.')
def extract_val_and_dval(string):
split_string = string.split('(')
if '.' in split_string[0] and '.' not in split_string[1][:-1]:
factor = 10 ** -len(split_string[0].partition('.')[2])
else:
factor = 1
return float(split_string[0]), float(split_string[1][:-1]) * factor
loc_priors = []
for i_n, i_prior in enumerate(priors):
if isinstance(i_prior, Obs):
loc_priors.append(i_prior)
else:
loc_val, loc_dval = extract_val_and_dval(i_prior)
loc_priors.append(pseudo_Obs(loc_val, loc_dval, 'p' + str(i_n)))
result_dict['priors'] = loc_priors
if not silent:
print('Fit with', n_parms, 'parameters')
y_f = [o.value for o in y]
dy_f = [o.dvalue for o in y]
if np.any(np.asarray(dy_f) <= 0.0):
raise Exception('No y errors available, run the gamma method first.')
p_f = [o.value for o in loc_priors]
dp_f = [o.dvalue for o in loc_priors]
if np.any(np.asarray(dp_f) <= 0.0):
raise Exception('No prior errors available, run the gamma method first.')
if 'initial_guess' in kwargs:
x0 = kwargs.get('initial_guess')
if len(x0) != n_parms:
raise Exception('Initial guess does not have the correct length.')
else:
x0 = p_f
def chisqfunc(p):
model = func(p, x)
chisq = anp.sum(((y_f - model) / dy_f) ** 2) + anp.sum(((p_f - p) / dp_f) ** 2)
return chisq
if not silent:
print('Method: migrad')
m = iminuit.Minuit.from_array_func(chisqfunc, x0, error=np.asarray(x0) * 0.01, errordef=1, print_level=0)
if 'tol' in kwargs:
m.tol = kwargs.get('tol')
else:
m.tol = 1e-4
m.migrad()
params = np.asarray(m.values.values())
result_dict['chisquare/d.o.f.'] = m.fval / len(x)
result_dict['method'] = 'migrad'
if not silent:
print('chisquare/d.o.f.:', result_dict['chisquare/d.o.f.'])
if not m.get_fmin().is_valid:
raise Exception('The minimization procedure did not converge.')
hess_inv = np.linalg.pinv(jacobian(jacobian(chisqfunc))(params))
def chisqfunc_compact(d):
model = func(d[:n_parms], x)
chisq = anp.sum(((d[n_parms: n_parms + len(x)] - model) / dy_f) ** 2) + anp.sum(((d[n_parms + len(x):] - d[:n_parms]) / dp_f) ** 2)
return chisq
jac_jac = jacobian(jacobian(chisqfunc_compact))(np.concatenate((params, y_f, p_f)))
deriv = -hess_inv @ jac_jac[:n_parms, n_parms:]
result = []
for i in range(n_parms):
result.append(derived_observable(lambda x, **kwargs: x[0], [pseudo_Obs(params[i], 0.0, y[0].names[0], y[0].shape[y[0].names[0]])] + list(y) + list(loc_priors), man_grad=[0] + list(deriv[i])))
result_dict['fit_parameters'] = result
result_dict['chisquare'] = chisqfunc(np.asarray(params))
if kwargs.get('resplot') is True:
residual_plot(x, y, func, result)
if kwargs.get('qqplot') is True:
qqplot(x, y, func, result)
return result_dict if kwargs.get('dict_output') else result
def fit_lin(x, y, **kwargs):
"""Performs a linear fit to y = n + m * x and returns two Obs n, m.
y has to be a list of Obs, the dvalues of the Obs are used as yerror for the fit.
x can either be a list of floats in which case no xerror is assumed, or
a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
"""
def f(a, x):
y = a[0] + a[1] * x
return y
if all(isinstance(n, Obs) for n in x):
return odr_fit(x, y, f, **kwargs)
elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
return standard_fit(x, y, f, **kwargs)
else:
raise Exception('Unsupported types for x')
def fit_exp(data, **kwargs):
"""Fit a single exponential to a discrete time series of Obs without errors.
Keyword arguments
-----------------
shift -- specifies the absolute timeslice value of the first entry of data (default 0.0)
only important if one is interested in the matrix element, for the mass this is irrelevant.
"""
if 'shift' in kwargs:
shift = kwargs.get("shift")
else:
shift = 0
length = len(data)
xsum = 0
xsum2 = 0
ysum = 0
xysum = 0
for i in range(shift, length + shift):
xsum += i
xsum2 += i ** 2
tmp_log = np.log(np.abs(data[i - shift]))
ysum += tmp_log
xysum += i * tmp_log
res0 = -(length * xysum - xsum * ysum) / (length * xsum2 - xsum * xsum) # mass
res1 = np.exp((xsum2 * ysum - xsum * xysum) / (length * xsum2 - xsum * xsum)) # matrix element
return [res0, res1]
def qqplot(x, o_y, func, p):
""" Generates a quantile-quantile plot of the fit result which can be used to
check if the residuals of the fit are gaussian distributed.
"""
residuals = []
for i_x, i_y in zip(x, o_y):
residuals.append((i_y - func(p, i_x)) / i_y.dvalue)
residuals = sorted(residuals)
my_y = [o.value for o in residuals]
probplot = scipy.stats.probplot(my_y)
my_x = probplot[0][0]
fig = plt.figure(figsize=(8, 8 / 1.618))
plt.errorbar(my_x, my_y, fmt='o')
fit_start = my_x[0]
fit_stop = my_x[-1]
samples = np.arange(fit_start, fit_stop, 0.01)
plt.plot(samples, samples, 'k--', zorder=11, label='Standard normal distribution')
plt.plot(samples, probplot[1][0] * samples + probplot[1][1], zorder=10, label='Least squares fit, r=' + str(np.around(probplot[1][2], 3)))
plt.xlabel('Theoretical quantiles')
plt.ylabel('Ordered Values')
plt.legend()
plt.show()
def residual_plot(x, y, func, fit_res):
""" Generates a plot which compares the fit to the data and displays the corresponding residuals"""
xstart = x[0] - 0.5
xstop = x[-1] + 0.5
x_samples = np.arange(xstart, xstop, 0.01)
plt.figure(figsize=(8, 8 / 1.618))
gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1], wspace=0.0, hspace=0.0)
ax0 = plt.subplot(gs[0])
ax0.errorbar(x, [o.value for o in y], yerr=[o.dvalue for o in y], ls='none', fmt='o', capsize=3, markersize=5, label='Data')
ax0.plot(x_samples, func([o.value for o in fit_res], x_samples), label='Fit', zorder=10)
ax0.set_xticklabels([])
ax0.set_xlim([xstart, xstop])
ax0.set_xticklabels([])
ax0.legend()
residuals = (np.asarray([o.value for o in y]) - func([o.value for o in fit_res], x)) / np.asarray([o.dvalue for o in y])
ax1 = plt.subplot(gs[1])
ax1.plot(x, residuals, 'ko', ls='none', markersize=5)
ax1.tick_params(direction='out')
ax1.tick_params(axis="x", bottom=True, top=True, labelbottom=True)
ax1.axhline(y=0.0, ls='--', color='k')
ax1.fill_between(x_samples, -1.0, 1.0, alpha=0.1, facecolor='k')
ax1.set_xlim([xstart, xstop])
ax1.set_ylabel('Residuals')
plt.subplots_adjust(wspace=None, hspace=None)
plt.show()
def covariance_matrix(y):
"""Returns the covariance matrix of y."""
length = len(y)
cov = np.zeros((length, length))
for i, item in enumerate(y):
for j, jtem in enumerate(y[:i + 1]):
if i == j:
cov[i, j] = item.dvalue ** 2
else:
cov[i, j] = covariance(item, jtem)
return cov + cov.T - np.diag(np.diag(cov))
def error_band(x, func, beta):
"""Returns the error band for an array of sample values x, for given fit function func with optimized parameters beta."""
cov = covariance_matrix(beta)
if np.any(np.abs(cov - cov.T) > 1000 * np.finfo(np.float).eps):
print('Warning, Covariance matrix is not symmetric within floating point precision')
print('cov - cov.T:')
print(cov - cov.T)
deriv = []
for i, item in enumerate(x):
deriv.append(np.array(egrad(func)([o.value for o in beta], item)))
err = []
for i, item in enumerate(x):
err.append(np.sqrt(deriv[i] @ cov @ deriv[i]))
err = np.array(err)
return err
def fit_general(x, y, func, silent=False, **kwargs):
"""Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
WARNING: In the current version the fits are performed with numerical derivatives.
Plausibility of the results should be checked. To control the numerical differentiation
the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
func has to be of the form
def func(a, x):
y = a[0] + a[1] * x + a[2] * np.sinh(x)
return y
y has to be a list of Obs, the dvalues of the Obs are used as yerror for the fit.
x can either be a list of floats in which case no xerror is assumed, or
a list of Obs, where the dvalues of the Obs are used as xerror for the fit.
Keyword arguments
-----------------
silent -- If true all output to the console is omitted (default False).
initial_guess -- can provide an initial guess for the input parameters. Relevant for non-linear fits
with many parameters.
"""
if not silent:
print('WARNING: This function is deprecated and will be removed in future versions.')
print('New fit functions with exact error propagation are now available as alternative.')
if not callable(func):
raise TypeError('func has to be a function.')
for i in range(10):
try:
func(np.arange(i), 0)
except:
pass
else:
break
n_parms = i
if not silent:
print('Fit with', n_parms, 'parameters')
global print_output, beta0
print_output = 1
if 'initial_guess' in kwargs:
beta0 = kwargs.get('initial_guess')
if len(beta0) != n_parms:
raise Exception('Initial guess does not have the correct length.')
else:
beta0 = np.arange(n_parms)
if len(x) != len(y):
raise Exception('x and y have to have the same length')
if all(isinstance(n, Obs) for n in x):
obs = x + y
x_constants = None
xerr = [o.dvalue for o in x]
yerr = [o.dvalue for o in y]
elif all(isinstance(n, float) or isinstance(n, int) for n in x) or isinstance(x, np.ndarray):
obs = y
x_constants = x
xerr = None
yerr = [o.dvalue for o in y]
else:
raise Exception('Unsupported types for x')
def do_the_fit(obs, **kwargs):
global print_output, beta0
func = kwargs.get('function')
yerr = kwargs.get('yerr')
length = len(yerr)
xerr = kwargs.get('xerr')
if length == len(obs):
assert 'x_constants' in kwargs
data = RealData(kwargs.get('x_constants'), obs, sy=yerr)
fit_type = 2
elif length == len(obs) // 2:
data = RealData(obs[:length], obs[length:], sx=xerr, sy=yerr)
fit_type = 0
else:
raise Exception('x and y do not fit together.')
model = Model(func)
odr = ODR(data, model, beta0, partol=np.finfo(np.float).eps)
odr.set_job(fit_type=fit_type, deriv=1)
output = odr.run()
if print_output and not silent:
print(*output.stopreason)
print('chisquare/d.o.f.:', output.res_var)
print_output = 0
beta0 = output.beta
return output.beta[kwargs.get('n')]
res = []
for n in range(n_parms):
res.append(derived_observable(do_the_fit, obs, function=func, xerr=xerr, yerr=yerr, x_constants=x_constants, num_grad=True, n=n, **kwargs))
return res

View file

@ -0,0 +1,2 @@
from .input import *
from . import bdio

628
pyerrors/input/bdio.py Normal file
View file

@ -0,0 +1,628 @@
#!/usr/bin/env python
# coding: utf-8
import ctypes
import hashlib
import autograd.numpy as np # Thinly-wrapped numpy
from ..pyerrors import Obs
def read_ADerrors(file_path, bdio_path='./libbdio.so', **kwargs):
""" Extract generic MCMC data from a bdio file
read_ADerrors requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
"""
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_seek_record = bdio.bdio_seek_record
bdio_seek_record.restype = ctypes.c_int
bdio_seek_record.argtypes = [ctypes.c_void_p]
bdio_get_rlen = bdio.bdio_get_rlen
bdio_get_rlen.restype = ctypes.c_int
bdio_get_rlen.argtypes = [ctypes.c_void_p]
bdio_get_ruinfo = bdio.bdio_get_ruinfo
bdio_get_ruinfo.restype = ctypes.c_int
bdio_get_ruinfo.argtypes = [ctypes.c_void_p]
bdio_read = bdio.bdio_read
bdio_read.restype = ctypes.c_size_t
bdio_read.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_f64 = bdio.bdio_read_f64
bdio_read_f64.restype = ctypes.c_size_t
bdio_read_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_int32 = bdio.bdio_read_int32
bdio_read_int32.restype = ctypes.c_size_t
bdio_read_int32.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
read = 'r'
b_read = read.encode('utf-8')
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), None)
return_list = []
print('Reading of bdio file started')
while 1 > 0:
record = bdio_seek_record(fbdio)
ruinfo = bdio_get_ruinfo(fbdio)
if ruinfo == 7:
print('MD5sum found') # For now we just ignore these entries and do not perform any checks on them
continue
if ruinfo < 0:
# EOF reached
break
rlen = bdio_get_rlen(fbdio)
def read_c_double():
d_buf = ctypes.c_double
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(8), ctypes.c_void_p(fbdio))
return pd_buf.value
mean = read_c_double()
print('mean', mean)
def read_c_size_t():
d_buf = ctypes.c_size_t
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_int32(ppd_buf, ctypes.c_size_t(4), ctypes.c_void_p(fbdio))
return pd_buf.value
neid = read_c_size_t()
print('neid', neid)
ndata = []
for index in range(neid):
ndata.append(read_c_size_t())
print('ndata', ndata)
nrep = []
for index in range(neid):
nrep.append(read_c_size_t())
print('nrep', nrep)
vrep = []
for index in range(neid):
vrep.append([])
for jndex in range(nrep[index]):
vrep[-1].append(read_c_size_t())
print('vrep', vrep)
ids = []
for index in range(neid):
ids.append(read_c_size_t())
print('ids', ids)
nt = []
for index in range(neid):
nt.append(read_c_size_t())
print('nt', nt)
zero = []
for index in range(neid):
zero.append(read_c_double())
print('zero', zero)
four = []
for index in range(neid):
four.append(read_c_double())
print('four', four)
d_buf = ctypes.c_double * np.sum(ndata)
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(8 * np.sum(ndata)), ctypes.c_void_p(fbdio))
delta = pd_buf[:]
samples = np.split(np.asarray(delta) + mean, np.cumsum([a for su in vrep for a in su])[:-1])
no_reps = [len(o) for o in vrep]
assert len(ids) == len(no_reps)
tmp_names = []
ens_length = max([len(str(o)) for o in ids])
for loc_id, reps in zip(ids, no_reps):
for index in range(reps):
missing_chars = ens_length - len(str(loc_id))
tmp_names.append(str(loc_id) + ' ' * missing_chars + 'r' + '{0:03d}'.format(index))
return_list.append(Obs(samples, tmp_names))
bdio_close(fbdio)
print()
print(len(return_list), 'observable(s) extracted.')
return return_list
def write_ADerrors(obs_list, file_path, bdio_path='./libbdio.so', **kwargs):
""" Write Obs to a bdio file according to ADerrors conventions
read_mesons requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
"""
for obs in obs_list:
if not obs.e_names:
raise Exception('Run the gamma method first for all obs.')
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_start_record = bdio.bdio_start_record
bdio_start_record.restype = ctypes.c_int
bdio_start_record.argtypes = [ctypes.c_size_t, ctypes.c_size_t, ctypes.c_void_p]
bdio_flush_record = bdio.bdio_flush_record
bdio_flush_record.restype = ctypes.c_int
bdio_flush_record.argytpes = [ctypes.c_void_p]
bdio_write_f64 = bdio.bdio_write_f64
bdio_write_f64.restype = ctypes.c_size_t
bdio_write_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_write_int32 = bdio.bdio_write_int32
bdio_write_int32.restype = ctypes.c_size_t
bdio_write_int32.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
write = 'w'
b_write = write.encode('utf-8')
form = 'pyerrors ADerror export'
b_form = form.encode('utf-8')
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_write), b_form)
for obs in obs_list:
mean = obs.value
neid = len(obs.e_names)
vrep = [[obs.shape[o] for o in sl] for sl in list(obs.e_content.values())]
vrep_write = [item for sublist in vrep for item in sublist]
ndata = [np.sum(o) for o in vrep]
nrep = [len(o) for o in vrep]
print('ndata', ndata)
print('nrep', nrep)
print('vrep', vrep)
keys = list(obs.e_content.keys())
ids = []
for key in keys:
try: # Try to convert key to integer
ids.append(int(key))
except: # If not possible construct a hash
ids.append(int(hashlib.sha256(key.encode('utf-8')).hexdigest(), 16) % 10 ** 8)
print('ids', ids)
nt = []
for e, e_name in enumerate(obs.e_names):
r_length = []
for r_name in obs.e_content[e_name]:
r_length.append(len(obs.deltas[r_name]))
#e_N = np.sum(r_length)
nt.append(max(r_length) // 2)
print('nt', nt)
zero = neid * [0.0]
four = neid * [4.0]
print('zero', zero)
print('four', four)
delta = np.concatenate([item for sublist in [[obs.deltas[o] for o in sl] for sl in list(obs.e_content.values())] for item in sublist])
bdio_start_record(0x00, 8, fbdio)
def write_c_double(double):
pd_buf = ctypes.c_double(double)
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iwrite = bdio_write_f64(ppd_buf, ctypes.c_size_t(8), ctypes.c_void_p(fbdio))
def write_c_size_t(int32):
pd_buf = ctypes.c_size_t(int32)
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iwrite = bdio_write_int32(ppd_buf, ctypes.c_size_t(4), ctypes.c_void_p(fbdio))
write_c_double(obs.value)
write_c_size_t(neid)
for element in ndata:
write_c_size_t(element)
for element in nrep:
write_c_size_t(element)
for element in vrep_write:
write_c_size_t(element)
for element in ids:
write_c_size_t(element)
for element in nt:
write_c_size_t(element)
for element in zero:
write_c_double(element)
for element in four:
write_c_double(element)
for element in delta:
write_c_double(element)
bdio_close(fbdio)
return 0
def _get_kwd(string, key):
return (string.split(key, 1)[1]).split(" ", 1)[0]
def _get_corr_name(string, key):
return (string.split(key, 1)[1]).split(' NDIM=', 1)[0]
def read_mesons(file_path, bdio_path='./libbdio.so', **kwargs):
""" Extract mesons data from a bdio file and return it as a dictionary
The dictionary can be accessed with a tuple consisting of (type, source_position, kappa1, kappa2)
read_mesons requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
stop -- stops reading at given configuration number (default None)
alternative_ensemble_name -- Manually overwrite ensemble name
"""
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_seek_record = bdio.bdio_seek_record
bdio_seek_record.restype = ctypes.c_int
bdio_seek_record.argtypes = [ctypes.c_void_p]
bdio_get_rlen = bdio.bdio_get_rlen
bdio_get_rlen.restype = ctypes.c_int
bdio_get_rlen.argtypes = [ctypes.c_void_p]
bdio_get_ruinfo = bdio.bdio_get_ruinfo
bdio_get_ruinfo.restype = ctypes.c_int
bdio_get_ruinfo.argtypes = [ctypes.c_void_p]
bdio_read = bdio.bdio_read
bdio_read.restype = ctypes.c_size_t
bdio_read.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_f64 = bdio.bdio_read_f64
bdio_read_f64.restype = ctypes.c_size_t
bdio_read_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
read = 'r'
b_read = read.encode('utf-8')
form = 'Generic Correlator Format 1.0'
b_form = form.encode('utf-8')
ensemble_name = ''
volume = [] # lattice volume
boundary_conditions = []
corr_name = [] # Contains correlator names
corr_type = [] # Contains correlator data type (important for reading out numerical data)
corr_props = [] # Contanis propagator types (Component of corr_kappa)
d0 = 0 # tvals
d1 = 0 # nnoise
prop_kappa = [] # Contains propagator kappas (Component of corr_kappa)
prop_source = [] # Contains propagator source positions
# Check noise type for multiple replica?
cnfg_no = -1
corr_no = -1
data = []
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), ctypes.c_char_p(b_form))
print('Reading of bdio file started')
while 1 > 0:
record = bdio_seek_record(fbdio)
ruinfo = bdio_get_ruinfo(fbdio)
if ruinfo < 0:
# EOF reached
break
rlen = bdio_get_rlen(fbdio)
if ruinfo == 5:
d_buf = ctypes.c_double * (2 + d0 * d1 * 2)
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
if corr_type[corr_no] == 'complex':
tmp_mean = np.mean(np.asarray(np.split(np.asarray(pd_buf[2 + 2 * d1:-2 * d1:2]), d0 - 2)), axis=1)
else:
tmp_mean = np.mean(np.asarray(np.split(np.asarray(pd_buf[2 + d1:-d0 * d1 - d1]), d0 - 2)), axis=1)
data[corr_no].append(tmp_mean)
corr_no += 1
else:
alt_buf = ctypes.create_string_buffer(1024)
palt_buf = ctypes.c_char_p(ctypes.addressof(alt_buf))
iread = bdio_read(palt_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
if rlen != iread:
print('Error')
for i, item in enumerate(alt_buf):
if item == b'\x00':
alt_buf[i] = b' '
tmp_string = (alt_buf[:].decode("utf-8")).rstrip()
if ruinfo == 0:
ensemble_name = _get_kwd(tmp_string, 'ENSEMBLE=')
volume.append(int(_get_kwd(tmp_string, 'L0=')))
volume.append(int(_get_kwd(tmp_string, 'L1=')))
volume.append(int(_get_kwd(tmp_string, 'L2=')))
volume.append(int(_get_kwd(tmp_string, 'L3=')))
boundary_conditions.append(_get_kwd(tmp_string, 'BC0='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC1='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC2='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC3='))
if ruinfo == 1:
corr_name.append(_get_corr_name(tmp_string, 'CORR_NAME='))
corr_type.append(_get_kwd(tmp_string, 'DATATYPE='))
corr_props.append([_get_kwd(tmp_string, 'PROP0='), _get_kwd(tmp_string, 'PROP1=')])
if d0 == 0:
d0 = int(_get_kwd(tmp_string, 'D0='))
else:
if d0 != int(_get_kwd(tmp_string, 'D0=')):
print('Error: Varying number of time values')
if d1 == 0:
d1 = int(_get_kwd(tmp_string, 'D1='))
else:
if d1 != int(_get_kwd(tmp_string, 'D1=')):
print('Error: Varying number of random sources')
if ruinfo == 2:
prop_kappa.append(_get_kwd(tmp_string, 'KAPPA='))
prop_source.append(_get_kwd(tmp_string, 'x0='))
if ruinfo == 4:
if 'stop' in kwargs:
if cnfg_no >= kwargs.get('stop') - 1:
break
cnfg_no += 1
print('\r%s %i' % ('Reading configuration', cnfg_no + 1), end='\r')
if cnfg_no == 0:
no_corrs = len(corr_name)
data = []
for c in range(no_corrs):
data.append([])
corr_no = 0
bdio_close(fbdio)
print('\nEnsemble: ', ensemble_name)
if 'alternative_ensemble_name' in kwargs:
ensemble_name = kwargs.get('alternative_ensemble_name')
print('Ensemble name overwritten to', ensemble_name)
print('Lattice volume: ', volume)
print('Boundary conditions: ', boundary_conditions)
print('Number of time values: ', d0)
print('Number of random sources: ', d1)
print('Number of corrs: ', len(corr_name))
print('Number of configurations: ', cnfg_no + 1)
corr_kappa = [] # Contains kappa values for both propagators of given correlation function
corr_source = []
for item in corr_props:
corr_kappa.append([float(prop_kappa[int(item[0])]), float(prop_kappa[int(item[1])])])
if prop_source[int(item[0])] != prop_source[int(item[1])]:
raise Exception('Source position do not match for correlator' + str(item))
else:
corr_source.append(int(prop_source[int(item[0])]))
result = {}
for c in range(no_corrs):
tmp_corr = []
for t in range(d0 - 2):
tmp_corr.append(Obs([np.asarray(data[c])[:, t]], [ensemble_name]))
result[(corr_name[c], corr_source[c]) + tuple(sorted(corr_kappa[c]))] = tmp_corr
# Check that all data entries have the same number of configurations
if len(set([o[0].N for o in list(result.values())])) != 1:
raise Exception('Error: Not all correlators have the same number of configurations. bdio file is possibly corrupted.')
return result
def read_dSdm(file_path, bdio_path='./libbdio.so', **kwargs):
""" Extract dSdm data from a bdio file and return it as a dictionary
The dictionary can be accessed with a tuple consisting of (type, kappa)
read_dSdm requires bdio to be compiled into a shared library. This can be achieved by
adding the flag -fPIC to CC and changing the all target to
all: bdio.o $(LIBDIR)
gcc -shared -Wl,-soname,libbdio.so -o $(BUILDDIR)/libbdio.so $(BUILDDIR)/bdio.o
cp $(BUILDDIR)/libbdio.so $(LIBDIR)/
Parameters
----------
file_path -- path to the bdio file
bdio_path -- path to the shared bdio library libbdio.so (default ./libbdio.so)
stop -- stops reading at given configuration number (default None)
"""
bdio = ctypes.cdll.LoadLibrary(bdio_path)
bdio_open = bdio.bdio_open
bdio_open.restype = ctypes.c_void_p
bdio_close = bdio.bdio_close
bdio_close.restype = ctypes.c_int
bdio_close.argtypes = [ctypes.c_void_p]
bdio_seek_record = bdio.bdio_seek_record
bdio_seek_record.restype = ctypes.c_int
bdio_seek_record.argtypes = [ctypes.c_void_p]
bdio_get_rlen = bdio.bdio_get_rlen
bdio_get_rlen.restype = ctypes.c_int
bdio_get_rlen.argtypes = [ctypes.c_void_p]
bdio_get_ruinfo = bdio.bdio_get_ruinfo
bdio_get_ruinfo.restype = ctypes.c_int
bdio_get_ruinfo.argtypes = [ctypes.c_void_p]
bdio_read = bdio.bdio_read
bdio_read.restype = ctypes.c_size_t
bdio_read.argtypes = [ctypes.c_char_p, ctypes.c_size_t, ctypes.c_void_p]
bdio_read_f64 = bdio.bdio_read_f64
bdio_read_f64.restype = ctypes.c_size_t
bdio_read_f64.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_void_p]
b_path = file_path.encode('utf-8')
read = 'r'
b_read = read.encode('utf-8')
form = 'Generic Correlator Format 1.0'
b_form = form.encode('utf-8')
ensemble_name = ''
volume = [] # lattice volume
boundary_conditions = []
corr_name = [] # Contains correlator names
corr_type = [] # Contains correlator data type (important for reading out numerical data)
corr_props = [] # Contains propagator types (Component of corr_kappa)
d0 = 0 # tvals
d1 = 0 # nnoise
prop_kappa = [] # Contains propagator kappas (Component of corr_kappa)
# Check noise type for multiple replica?
cnfg_no = -1
corr_no = -1
data = []
fbdio = bdio_open(ctypes.c_char_p(b_path), ctypes.c_char_p(b_read), ctypes.c_char_p(b_form))
print('Reading of bdio file started')
while 1 > 0:
record = bdio_seek_record(fbdio)
ruinfo = bdio_get_ruinfo(fbdio)
if ruinfo < 0:
# EOF reached
break
rlen = bdio_get_rlen(fbdio)
if ruinfo == 5:
d_buf = ctypes.c_double * (2 + d0)
pd_buf = d_buf()
ppd_buf = ctypes.c_void_p(ctypes.addressof(pd_buf))
iread = bdio_read_f64(ppd_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
tmp_mean = np.mean(np.asarray(pd_buf[2:]))
data[corr_no].append(tmp_mean)
corr_no += 1
else:
alt_buf = ctypes.create_string_buffer(1024)
palt_buf = ctypes.c_char_p(ctypes.addressof(alt_buf))
iread = bdio_read(palt_buf, ctypes.c_size_t(rlen), ctypes.c_void_p(fbdio))
if rlen != iread:
print('Error')
for i, item in enumerate(alt_buf):
if item == b'\x00':
alt_buf[i] = b' '
tmp_string = (alt_buf[:].decode("utf-8")).rstrip()
if ruinfo == 0:
creator = _get_kwd(tmp_string, 'CREATOR=')
ensemble_name = _get_kwd(tmp_string, 'ENSEMBLE=')
volume.append(int(_get_kwd(tmp_string, 'L0=')))
volume.append(int(_get_kwd(tmp_string, 'L1=')))
volume.append(int(_get_kwd(tmp_string, 'L2=')))
volume.append(int(_get_kwd(tmp_string, 'L3=')))
boundary_conditions.append(_get_kwd(tmp_string, 'BC0='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC1='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC2='))
boundary_conditions.append(_get_kwd(tmp_string, 'BC3='))
if ruinfo == 1:
corr_name.append(_get_corr_name(tmp_string, 'CORR_NAME='))
corr_type.append(_get_kwd(tmp_string, 'DATATYPE='))
corr_props.append(_get_kwd(tmp_string, 'PROP0='))
if d0 == 0:
d0 = int(_get_kwd(tmp_string, 'D0='))
else:
if d0 != int(_get_kwd(tmp_string, 'D0=')):
print('Error: Varying number of time values')
if ruinfo == 2:
prop_kappa.append(_get_kwd(tmp_string, 'KAPPA='))
if ruinfo == 4:
if 'stop' in kwargs:
if cnfg_no >= kwargs.get('stop') - 1:
break
cnfg_no += 1
print('\r%s %i' % ('Reading configuration', cnfg_no + 1), end='\r')
if cnfg_no == 0:
no_corrs = len(corr_name)
data = []
for c in range(no_corrs):
data.append([])
corr_no = 0
bdio_close(fbdio)
print('\nCreator: ', creator)
print('Ensemble: ', ensemble_name)
print('Lattice volume: ', volume)
print('Boundary conditions: ', boundary_conditions)
print('Number of random sources: ', d0)
print('Number of corrs: ', len(corr_name))
print('Number of configurations: ', cnfg_no + 1)
corr_kappa = [] # Contains kappa values for both propagators of given correlation function
corr_source = []
for item in corr_props:
corr_kappa.append(float(prop_kappa[int(item)]))
result = {}
for c in range(no_corrs):
result[(corr_name[c], str(corr_kappa[c]))] = Obs([np.asarray(data[c])], [ensemble_name])
# Check that all data entries have the same number of configurations
if len(set([o.N for o in list(result.values())])) != 1:
raise Exception('Error: Not all correlators have the same number of configurations. bdio file is possibly corrupted.')
return result

660
pyerrors/input/input.py Normal file
View file

@ -0,0 +1,660 @@
#!/usr/bin/env python
# coding: utf-8
import sys
import os
import fnmatch
import re
import struct
import autograd.numpy as np # Thinly-wrapped numpy
from ..pyerrors import Obs
from ..fits import fit_lin
def read_sfcf(path, prefix, name, **kwargs):
"""Read sfcf C format from given folder structure.
Keyword arguments
-----------------
im -- if True, read imaginary instead of real part of the correlation function.
single -- if True, read a boundary-to-boundary correlation function with a single value
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
"""
if kwargs.get('im'):
im = 1
part = 'imaginary'
else:
im = 0
part = 'real'
if kwargs.get('single'):
b2b = 1
single = 1
else:
b2b = 0
single = 0
if kwargs.get('b2b'):
b2b = 1
read = 0
T = 0
start = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
print('Error, directory not found')
sys.exit()
for exc in ls:
if fnmatch.fnmatch(exc, prefix + '*'):
ls = list(set(ls) - set(exc))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
print('Read', part, 'part of', name, 'from', prefix, ',', replica, 'replica')
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
new_names = ls
print(replica, 'replica')
for i, item in enumerate(ls):
print(item)
sub_ls = []
for (dirpath, dirnames, filenames) in os.walk(path+'/'+item):
sub_ls.extend(dirnames)
break
for exc in sub_ls:
if fnmatch.fnmatch(exc, 'cfg*'):
sub_ls = list(set(sub_ls) - set(exc))
sub_ls.sort(key=lambda x: int(x[3:]))
no_cfg = len(sub_ls)
print(no_cfg, 'configurations')
if i == 0:
with open(path + '/' + item + '/' + sub_ls[0] + '/' + name) as fp:
for k, line in enumerate(fp):
if read == 1 and not line.strip() and k > start + 1:
break
if read == 1 and k >= start:
T += 1
if '[correlator]' in line:
read = 1
start = k + 7 + b2b
T -= b2b
deltas = []
for j in range(T):
deltas.append([])
sublength = len(sub_ls)
for j in range(T):
deltas[j].append(np.zeros(sublength))
for cnfg, subitem in enumerate(sub_ls):
with open(path + '/' + item + '/' + subitem + '/'+name) as fp:
for k, line in enumerate(fp):
if(k >= start and k < start + T):
floats = list(map(float, line.split()))
deltas[k-start][i][cnfg] = floats[1 + im - single]
result = []
for t in range(T):
result.append(Obs(deltas[t], new_names))
return result
def read_sfcf_c(path, prefix, name, **kwargs):
"""Read sfcf c format from given folder structure.
Keyword arguments
-----------------
im -- if True, read imaginary instead of real part of the correlation function.
single -- if True, read a boundary-to-boundary correlation function with a single value
b2b -- if True, read a time-dependent boundary-to-boundary correlation function
names -- Alternative labeling for replicas/ensembles. Has to have the appropriate length
"""
if kwargs.get('im'):
im = 1
part = 'imaginary'
else:
im = 0
part = 'real'
if kwargs.get('single'):
b2b = 1
single = 1
else:
b2b = 0
single = 0
if kwargs.get('b2b'):
b2b = 1
read = 0
T = 0
start = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(dirnames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude folders with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix+'*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
if 'names' in kwargs:
new_names = kwargs.get('names')
if len(new_names) != replica:
raise Exception('Names does not have the required length', replica)
else:
new_names = ls
print('Read', part, 'part of', name, 'from', prefix[:-1], ',', replica, 'replica')
for i, item in enumerate(ls):
sub_ls = []
for (dirpath, dirnames, filenames) in os.walk(path+'/'+item):
sub_ls.extend(filenames)
break
for exc in sub_ls:
if not fnmatch.fnmatch(exc, prefix+'*'):
sub_ls = list(set(sub_ls) - set([exc]))
sub_ls.sort(key=lambda x: int(re.findall(r'\d+', x)[-1]))
first_cfg = int(re.findall(r'\d+', sub_ls[0])[-1])
last_cfg = len(sub_ls) + first_cfg - 1
for cfg in range(1, len(sub_ls)):
if int(re.findall(r'\d+', sub_ls[cfg])[-1]) != first_cfg + cfg:
last_cfg = cfg + first_cfg - 1
break
no_cfg = last_cfg - first_cfg + 1
print(item, ':', no_cfg, 'evenly spaced configurations (', first_cfg, '-', last_cfg, ') ,', len(sub_ls) - no_cfg, 'configs omitted\n')
if i == 0:
read = 0
found = 0
with open(path+'/'+item+'/'+sub_ls[0]) as fp:
for k, line in enumerate(fp):
if 'quarks' in kwargs:
if found == 0 and read == 1:
if line.strip() == 'quarks ' + kwargs.get('quarks'):
found = 1
print('found', kwargs.get('quarks'))
else:
read = 0
if read == 1 and not line.strip():
break
if read == 1 and k >= start_read:
T += 1
if line.strip() == 'name '+name:
read = 1
start_read = k + 5 + b2b
print('T =', T, ', starting to read in line', start_read)
#TODO what to do if start_read was not found
if 'quarks' in kwargs:
if found == 0:
raise Exception(kwargs.get('quarks') + ' not found')
deltas = []
for j in range(T):
deltas.append([])
sublength = no_cfg
for j in range(T):
deltas[j].append(np.zeros(sublength))
for cfg in range(no_cfg):
with open(path+'/'+item+'/'+sub_ls[cfg]) as fp:
for k, line in enumerate(fp):
if k == start_read - 5 - b2b:
if line.strip() != 'name ' + name:
raise Exception('Wrong format', sub_ls[cfg])
if(k >= start_read and k < start_read + T):
floats = list(map(float, line.split()))
deltas[k-start_read][i][cfg] = floats[1 + im - single]
result = []
for t in range(T):
result.append(Obs(deltas[t], new_names))
return result
def read_qtop(path, prefix, **kwargs):
"""Read qtop format from given folder structure.
Keyword arguments
-----------------
target -- specifies the topological sector to be reweighted to (default 0)
full -- if true read the charge instead of the reweighting factor.
"""
if 'target' in kwargs:
target = kwargs.get('target')
else:
target = 0
if kwargs.get('full'):
full = 1
else:
full = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix+'*'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0])) # New version, to cope with ids, etc.
replica = len(ls)
print('Read Q_top from', prefix[:-1], ',', replica, 'replica')
deltas = []
for rep in range(replica):
tmp = []
with open(path+'/'+ls[rep]) as fp:
for k, line in enumerate(fp):
floats = list(map(float, line.split()))
if full == 1:
tmp.append(floats[1])
else:
if int(floats[1]) == target:
tmp.append(1.0)
else:
tmp.append(0.0)
deltas.append(np.array(tmp))
result = Obs(deltas, [(w.split('.'))[0] for w in ls])
return result
def read_rwms(path, prefix, **kwargs):
"""Read rwms format from given folder structure. Returns a list of length nrw
Keyword arguments
-----------------
new_format -- if True, the array of the associated numbers of Hasenbusch factors is extracted (v>=openQCD1.6)
r_start -- list which contains the first config to be read for each replicum
r_stop -- list which contains the last config to be read for each replicum
"""
if kwargs.get('new_format'):
extract_nfct = 1
else:
extract_nfct = 0
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
if 'r_stop' in kwargs:
r_stop = kwargs.get('r_stop')
if len(r_stop) != replica:
raise Exception('r_stop does not match number of replicas')
else:
r_stop = [None] * replica
print('Read reweighting factors from', prefix[:-1], ',', replica, 'replica', end='')
print_err = 0
if 'print_err' in kwargs:
print_err = 1
print()
deltas = []
for rep in range(replica):
tmp_array = []
with open(path+ '/' + ls[rep], 'rb') as fp:
#header
t = fp.read(4) # number of reweighting factors
if rep == 0:
nrw = struct.unpack('i', t)[0]
for k in range(nrw):
deltas.append([])
else:
if nrw != struct.unpack('i', t)[0]:
print('Error: different number of reweighting factors for replicum', rep)
sys.exit()
for k in range(nrw):
tmp_array.append([])
# This block is necessary for openQCD1.6 ms1 files
nfct = []
if extract_nfct == 1:
for i in range(nrw):
t = fp.read(4)
nfct.append(struct.unpack('i', t)[0])
print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
else:
for i in range(nrw):
nfct.append(1)
nsrc = []
for i in range(nrw):
t = fp.read(4)
nsrc.append(struct.unpack('i', t)[0])
#body
while 0 < 1:
t = fp.read(4)
if len(t) < 4:
break
if print_err:
config_no = struct.unpack('i', t)
for i in range(nrw):
tmp_nfct = 1.0
for j in range(nfct[i]):
t = fp.read(8 * nsrc[i])
t = fp.read(8 * nsrc[i])
tmp_rw = struct.unpack('d' * nsrc[i], t)
tmp_nfct *= np.mean(np.exp(-np.asarray(tmp_rw)))
if print_err:
print(config_no, i, j, np.mean(np.exp(-np.asarray(tmp_rw))), np.std(np.exp(-np.asarray(tmp_rw))))
print('Sources:', np.exp(-np.asarray(tmp_rw)))
print('Partial factor:', tmp_nfct)
tmp_array[i].append(tmp_nfct)
for k in range(nrw):
deltas[k].append(tmp_array[k][r_start[rep]:r_stop[rep]])
print(',', nrw, 'reweighting factors with', nsrc, 'sources')
result = []
for t in range(nrw):
result.append(Obs(deltas[t], [(w.split('.'))[0] for w in ls]))
return result
def read_pbp(path, prefix, **kwargs):
"""Read pbp format from given folder structure. Returns a list of length nrw
Keyword arguments
-----------------
r_start -- list which contains the first config to be read for each replicum
r_stop -- list which contains the last config to be read for each replicum
"""
extract_nfct = 1
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
if 'r_stop' in kwargs:
r_stop = kwargs.get('r_stop')
if len(r_stop) != replica:
raise Exception('r_stop does not match number of replicas')
else:
r_stop = [None] * replica
print('Read <bar{psi}\psi> from', prefix[:-1], ',', replica, 'replica', end='')
print_err = 0
if 'print_err' in kwargs:
print_err = 1
print()
deltas = []
for rep in range(replica):
tmp_array = []
with open(path+ '/' + ls[rep], 'rb') as fp:
#header
t = fp.read(4) # number of reweighting factors
if rep == 0:
nrw = struct.unpack('i', t)[0]
for k in range(nrw):
deltas.append([])
else:
if nrw != struct.unpack('i', t)[0]:
print('Error: different number of reweighting factors for replicum', rep)
sys.exit()
for k in range(nrw):
tmp_array.append([])
# This block is necessary for openQCD1.6 ms1 files
nfct = []
if extract_nfct == 1:
for i in range(nrw):
t = fp.read(4)
nfct.append(struct.unpack('i', t)[0])
print('nfct: ', nfct) # Hasenbusch factor, 1 for rat reweighting
else:
for i in range(nrw):
nfct.append(1)
nsrc = []
for i in range(nrw):
t = fp.read(4)
nsrc.append(struct.unpack('i', t)[0])
#body
while 0 < 1:
t = fp.read(4)
if len(t) < 4:
break
if print_err:
config_no = struct.unpack('i', t)
for i in range(nrw):
tmp_nfct = 1.0
for j in range(nfct[i]):
t = fp.read(8 * nsrc[i])
t = fp.read(8 * nsrc[i])
tmp_rw = struct.unpack('d' * nsrc[i], t)
tmp_nfct *= np.mean(np.asarray(tmp_rw))
if print_err:
print(config_no, i, j, np.mean(np.asarray(tmp_rw)), np.std(np.asarray(tmp_rw)))
print('Sources:', np.asarray(tmp_rw))
print('Partial factor:', tmp_nfct)
tmp_array[i].append(tmp_nfct)
for k in range(nrw):
deltas[k].append(tmp_array[k][r_start[rep]:r_stop[rep]])
print(',', nrw, '<bar{psi}\psi> with', nsrc, 'sources')
result = []
for t in range(nrw):
result.append(Obs(deltas[t], [(w.split('.'))[0] for w in ls]))
return result
def extract_t0(path, prefix, dtr_read, xmin, spatial_extent, fit_range=5, **kwargs):
"""Extract t0 from given .ms.dat files. Returns t0 as Obs.
It is assumed that all boundary effects have sufficiently decayed at x0=xmin.
The data around the zero crossing of t^2<E> - 0.3 is fitted with a linear function
from which the exact root is extracted.
Only works with openQCD v 1.2.
Parameters
----------
path -- Path to .ms.dat files
prefix -- Ensemble prefix
dtr_read -- Determines how many trajectories should be skipped when reading the ms.dat files.
Corresponds to dtr_cnfg / dtr_ms in the openQCD input file.
xmin -- First timeslice where the boundary effects have sufficiently decayed.
spatial_extent -- spatial extent of the lattice, required for normalization.
fit_range -- Number of data points left and right of the zero crossing to be included in the linear fit. (Default: 5)
Keyword arguments
-----------------
r_start -- list which contains the first config to be read for each replicum.
r_stop -- list which contains the last config to be read for each replicum.
plaquette -- If true extract the plaquette estimate of t0 instead.
"""
ls = []
for (dirpath, dirnames, filenames) in os.walk(path):
ls.extend(filenames)
break
if not ls:
print('Error, directory not found')
sys.exit()
# Exclude files with different names
for exc in ls:
if not fnmatch.fnmatch(exc, prefix + '*.ms.dat'):
ls = list(set(ls) - set([exc]))
if len(ls) > 1:
ls.sort(key=lambda x: int(re.findall(r'\d+', x[len(prefix):])[0]))
replica = len(ls)
if 'r_start' in kwargs:
r_start = kwargs.get('r_start')
if len(r_start) != replica:
raise Exception('r_start does not match number of replicas')
# Adjust Configuration numbering to python index
r_start = [o - 1 if o else None for o in r_start]
else:
r_start = [None] * replica
if 'r_stop' in kwargs:
r_stop = kwargs.get('r_stop')
if len(r_stop) != replica:
raise Exception('r_stop does not match number of replicas')
else:
r_stop = [None] * replica
print('Extract t0 from', prefix, ',', replica, 'replica')
Ysum = []
for rep in range(replica):
with open(path + '/' + ls[rep], 'rb') as fp:
# Read header
t = fp.read(12)
header = struct.unpack('iii', t)
if rep == 0:
dn = header[0]
nn = header[1]
tmax = header[2]
elif dn != header[0] or nn != header[1] or tmax != header[2]:
raise Exception('Replica parameters do not match.')
t = fp.read(8)
if rep == 0:
eps = struct.unpack('d', t)[0]
print('Step size:', eps, ', Maximal t value:', dn * (nn) * eps)
elif eps != struct.unpack('d', t)[0]:
raise Exception('Values for eps do not match among replica.')
Ysl = []
# Read body
while 0 < 1:
t = fp.read(4)
if(len(t) < 4):
break
nc = struct.unpack('i', t)[0]
t = fp.read(8 * tmax * (nn + 1))
if kwargs.get('plaquette'):
if nc % dtr_read == 0:
Ysl.append(struct.unpack('d' * tmax * (nn + 1), t))
t = fp.read(8 * tmax * (nn + 1))
if not kwargs.get('plaquette'):
if nc % dtr_read == 0:
Ysl.append(struct.unpack('d' * tmax * (nn + 1), t))
t = fp.read(8 * tmax * (nn + 1))
Ysum.append([])
for i, item in enumerate(Ysl):
Ysum[-1].append([np.mean(item[current + xmin:current + tmax - xmin]) for current in range(0, len(item), tmax)])
t2E_dict = {}
for n in range(nn + 1):
samples = []
for nrep, rep in enumerate(Ysum):
samples.append([])
for cnfg in rep:
samples[-1].append(cnfg[n])
samples[-1] = samples[-1][r_start[nrep]:r_stop[nrep]]
new_obs = Obs(samples, [(w.split('.'))[0] for w in ls])
t2E_dict[n * dn * eps] = (n * dn * eps) ** 2 * new_obs / (spatial_extent ** 3) - 0.3
zero_crossing = np.argmax(np.array([o.value for o in t2E_dict.values()]) > 0.0)
x = list(t2E_dict.keys())[zero_crossing - fit_range: zero_crossing + fit_range]
y = list(t2E_dict.values())[zero_crossing - fit_range: zero_crossing + fit_range]
[o.gamma_method() for o in y]
fit_result = fit_lin(x, y)
return -fit_result[0] / fit_result[1]

160
pyerrors/jackknifing.py Normal file
View file

@ -0,0 +1,160 @@
#!/usr/bin/env python
# coding: utf-8
import pickle
import matplotlib.pyplot as plt
import numpy as np
def _jack_error(jack):
n = jack.size
mean = np.mean(jack)
error = 0
for i in range(n):
error += (jack[i] - mean) ** 2
return np.sqrt((n - 1) / n * error)
class Jack:
def __init__(self, value, jacks):
self.jacks = jacks
self.N = list(map(np.size, self.jacks))
self.max_binsize = len(self.N)
self.value = value #list(map(np.mean, self.jacks))
self.dvalue = list(map(_jack_error, self.jacks))
def print(self, **kwargs):
"""Print basic properties of the Jack."""
if 'binsize' in kwargs:
b = kwargs.get('binsize') - 1
if b == -1:
b = 0
if not isinstance(b, int):
raise TypeError('binsize has to be integer')
if b + 1 > self.max_binsize:
raise Exception('Chosen binsize not calculated')
else:
b = 0
print('Result:\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self.dvalue[b], self.dvalue[b] * np.sqrt(2 * b / self.N[0]), np.abs(self.dvalue[b] / self.value * 100)))
def plot_tauint(self):
plt.xlabel('binsize')
plt.ylabel('tauint')
length = self.max_binsize
x = np.arange(length) + 1
plt.errorbar(x[:], (self.dvalue[:] / self.dvalue[0]) ** 2 / 2, yerr=np.sqrt(((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 * x[:] / self.N[0])) / 2) ** 2
+ ((2 * (self.dvalue[:] / self.dvalue[0]) ** 2 * np.sqrt(2 / self.N[0])) / 2) ** 2), linewidth=1, capsize=2)
plt.xlim(0.5, length + 0.5)
plt.title('Tauint')
plt.show()
def plot_history(self):
N = self.N
x = np.arange(N)
tmp = []
for i in range(self.replicas):
tmp.append(self.deltas[i] + self.r_values[i])
y = np.concatenate(tmp, axis=0) # Think about including kwarg to look only at some replica
plt.errorbar(x, y, fmt='.', markersize=3)
plt.xlim(-0.5, N - 0.5)
plt.show()
def dump(self, name, **kwargs):
"""Dump the Jack to a pickle file 'name'.
Keyword arguments:
path -- specifies a custom path for the file (default '.')
"""
if 'path' in kwargs:
file_name = kwargs.get('path') + '/' + name + '.p'
else:
file_name = name + '.p'
with open(file_name, 'wb') as fb:
pickle.dump(self, fb)
def generate_jack(obs, **kwargs):
full_data = []
for r, name in enumerate(obs.names):
if r == 0:
full_data = obs.deltas[name] + obs.r_values[name]
else:
full_data = np.append(full_data, obs.deltas[name] + obs.r_values[name])
jacks = []
if 'max_binsize' in kwargs:
max_b = kwargs.get('max_binsize')
if not isinstance(max_b, int):
raise TypeError('max_binsize has to be integer')
else:
max_b = 1
for b in range(max_b):
#binning if necessary
if b > 0:
n = full_data.size // (b + 1)
binned_data = np.zeros(n)
for i in range(n):
for j in range(b + 1):
binned_data[i] += full_data[i * (b + 1) + j]
binned_data[i] /= (b + 1)
else:
binned_data = full_data
n = binned_data.size
#generate jacks from data
mean = np.mean(binned_data)
tmp_jacks = np.zeros(n)
#print(binned_data)
for i in range(n):
tmp_jacks[i] = (n * mean - binned_data[i]) / (n - 1)
jacks.append(tmp_jacks)
# Value is not correctly reproduced here
return Jack(obs.value, jacks)
def derived_jack(func, data, **kwargs):
"""Construct a derived Jack according to func(data, **kwargs).
Parameters
----------
func -- arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work,
all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as np').
data -- list of Jacks, e.g. [jack1, jack2, jack3].
Notes
-----
For simple mathematical operations it can be practical to use anonymous functions.
For the ratio of two jacks one can e.g. use
new_jack = derived_jack(lambda x : x[0] / x[1], [jack1, jack2])
"""
# Check shapes of data
if not all(x.N == data[0].N for x in data):
raise Exception('Error: Shape of data does not fit')
values = np.zeros(len(data))
for j, item in enumerate(data):
values[j] = item.value
new_value = func(values, **kwargs)
jacks = []
for b in range(data[0].max_binsize):
tmp_jacks = np.zeros(data[0].N[b])
for i in range(data[0].N[b]):
values = np.zeros(len(data))
for j, item in enumerate(data):
values[j] = item.jacks[b][i]
tmp_jacks[i] = func(values, **kwargs)
jacks.append(tmp_jacks)
return Jack(new_value, jacks)

347
pyerrors/linalg.py Normal file
View file

@ -0,0 +1,347 @@
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import autograd.numpy as anp # Thinly-wrapped numpy
from .pyerrors import derived_observable
### This code block is directly taken from the current master branch of autograd and remains
# only until the new version is released on PyPi
from functools import partial
from autograd.extend import defvjp
_dot = partial(anp.einsum, '...ij,...jk->...ik')
# batched diag
_diag = lambda a: anp.eye(a.shape[-1])*a
# batched diagonal, similar to matrix_diag in tensorflow
def _matrix_diag(a):
reps = anp.array(a.shape)
reps[:-1] = 1
reps[-1] = a.shape[-1]
newshape = list(a.shape) + [a.shape[-1]]
return _diag(anp.tile(a, reps).reshape(newshape))
# https://arxiv.org/pdf/1701.00392.pdf Eq(4.77)
# Note the formula from Sec3.1 in https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf is incomplete
def grad_eig(ans, x):
"""Gradient of a general square (complex valued) matrix"""
e, u = ans # eigenvalues as 1d array, eigenvectors in columns
n = e.shape[-1]
def vjp(g):
ge, gu = g
ge = _matrix_diag(ge)
f = 1/(e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20)
f -= _diag(f)
ut = anp.swapaxes(u, -1, -2)
r1 = f * _dot(ut, gu)
r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n)))
r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut)
if not anp.iscomplexobj(x):
r = anp.real(r)
# the derivative is still complex for real input (imaginary delta is allowed), real output
# but the derivative should be real in real input case when imaginary delta is forbidden
return r
return vjp
defvjp(anp.linalg.eig, grad_eig)
### End of the code block from autograd.master
def scalar_mat_op(op, obs, **kwargs):
"""Computes the matrix to scalar operation op to a given matrix of Obs."""
def _mat(x, **kwargs):
dim = int(np.sqrt(len(x)))
if np.sqrt(len(x)) != dim:
raise Exception('Input has to have dim**2 entries')
mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(x[j + dim * i])
mat.append(row)
return op(anp.array(mat))
if isinstance(obs, np.ndarray):
raveled_obs = (1 * (obs.ravel())).tolist()
elif isinstance(obs, list):
raveled_obs = obs
else:
raise TypeError('Unproper type of input.')
return derived_observable(_mat, raveled_obs, **kwargs)
def mat_mat_op(op, obs, **kwargs):
"""Computes the matrix to matrix operation op to a given matrix of Obs."""
if kwargs.get('num_grad') is True:
return _num_diff_mat_mat_op(op, obs, **kwargs)
return derived_observable(lambda x, **kwargs: op(x), obs)
def eigh(obs, **kwargs):
"""Computes the eigenvalues and eigenvectors of a given hermitian matrix of Obs according to np.linalg.eigh."""
if kwargs.get('num_grad') is True:
return _num_diff_eigh(obs, **kwargs)
w = derived_observable(lambda x, **kwargs: anp.linalg.eigh(x)[0], obs)
v = derived_observable(lambda x, **kwargs: anp.linalg.eigh(x)[1], obs)
return w, v
def eig(obs, **kwargs):
"""Computes the eigenvalues of a given matrix of Obs according to np.linalg.eig."""
if kwargs.get('num_grad') is True:
return _num_diff_eig(obs, **kwargs)
# Note: Automatic differentiation of eig is implemented in the git of autograd
# but not yet released to PyPi (1.3)
w = derived_observable(lambda x, **kwargs: anp.real(anp.linalg.eig(x)[0]), obs)
return w
def pinv(obs, **kwargs):
"""Computes the Moore-Penrose pseudoinverse of a matrix of Obs."""
if kwargs.get('num_grad') is True:
return _num_diff_pinv(obs, **kwargs)
return derived_observable(lambda x, **kwargs: anp.linalg.pinv(x), obs)
def svd(obs, **kwargs):
"""Computes the singular value decomposition of a matrix of Obs."""
if kwargs.get('num_grad') is True:
return _num_diff_svd(obs, **kwargs)
u = derived_observable(lambda x, **kwargs: anp.linalg.svd(x, full_matrices=False)[0], obs)
s = derived_observable(lambda x, **kwargs: anp.linalg.svd(x, full_matrices=False)[1], obs)
vh = derived_observable(lambda x, **kwargs: anp.linalg.svd(x, full_matrices=False)[2], obs)
return (u, s, vh)
def slog_det(obs, **kwargs):
"""Computes the determinant of a matrix of Obs via np.linalg.slogdet."""
def _mat(x):
dim = int(np.sqrt(len(x)))
if np.sqrt(len(x)) != dim:
raise Exception('Input has to have dim**2 entries')
mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(x[j + dim * i])
mat.append(row)
(sign, logdet) = anp.linalg.slogdet(np.array(mat))
return sign * anp.exp(logdet)
if isinstance(obs, np.ndarray):
return derived_observable(_mat, (1 * (obs.ravel())).tolist(), **kwargs)
elif isinstance(obs, list):
return derived_observable(_mat, obs, **kwargs)
else:
raise TypeError('Unproper type of input.')
# Variants for numerical differentiation
def _num_diff_mat_mat_op(op, obs, **kwargs):
"""Computes the matrix to matrix operation op to a given matrix of Obs elementwise
which is suitable for numerical differentiation."""
def _mat(x, **kwargs):
dim = int(np.sqrt(len(x)))
if np.sqrt(len(x)) != dim:
raise Exception('Input has to have dim**2 entries')
mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(x[j + dim * i])
mat.append(row)
return op(np.array(mat))[kwargs.get('i')][kwargs.get('j')]
if isinstance(obs, np.ndarray):
raveled_obs = (1 * (obs.ravel())).tolist()
elif isinstance(obs, list):
raveled_obs = obs
else:
raise TypeError('Unproper type of input.')
dim = int(np.sqrt(len(raveled_obs)))
res_mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(derived_observable(_mat, raveled_obs, i=i, j=j, **kwargs))
res_mat.append(row)
return np.array(res_mat) @ np.identity(dim)
def _num_diff_eigh(obs, **kwargs):
"""Computes the eigenvalues and eigenvectors of a given hermitian matrix of Obs according to np.linalg.eigh
elementwise which is suitable for numerical differentiation."""
def _mat(x, **kwargs):
dim = int(np.sqrt(len(x)))
if np.sqrt(len(x)) != dim:
raise Exception('Input has to have dim**2 entries')
mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(x[j + dim * i])
mat.append(row)
n = kwargs.get('n')
res = np.linalg.eigh(np.array(mat))[n]
if n == 0:
return res[kwargs.get('i')]
else:
return res[kwargs.get('i')][kwargs.get('j')]
if isinstance(obs, np.ndarray):
raveled_obs = (1 * (obs.ravel())).tolist()
elif isinstance(obs, list):
raveled_obs = obs
else:
raise TypeError('Unproper type of input.')
dim = int(np.sqrt(len(raveled_obs)))
res_vec = []
for i in range(dim):
res_vec.append(derived_observable(_mat, raveled_obs, n=0, i=i, **kwargs))
res_mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(derived_observable(_mat, raveled_obs, n=1, i=i, j=j, **kwargs))
res_mat.append(row)
return (np.array(res_vec) @ np.identity(dim), np.array(res_mat) @ np.identity(dim))
def _num_diff_eig(obs, **kwargs):
"""Computes the eigenvalues of a given matrix of Obs according to np.linalg.eig
elementwise which is suitable for numerical differentiation."""
def _mat(x, **kwargs):
dim = int(np.sqrt(len(x)))
if np.sqrt(len(x)) != dim:
raise Exception('Input has to have dim**2 entries')
mat = []
for i in range(dim):
row = []
for j in range(dim):
row.append(x[j + dim * i])
mat.append(row)
n = kwargs.get('n')
res = np.linalg.eig(np.array(mat))[n]
if n == 0:
# Discard imaginary part of eigenvalue here
return np.real(res[kwargs.get('i')])
else:
return res[kwargs.get('i')][kwargs.get('j')]
if isinstance(obs, np.ndarray):
raveled_obs = (1 * (obs.ravel())).tolist()
elif isinstance(obs, list):
raveled_obs = obs
else:
raise TypeError('Unproper type of input.')
dim = int(np.sqrt(len(raveled_obs)))
res_vec = []
for i in range(dim):
# Note: Automatic differentiation of eig is implemented in the git of autograd
# but not yet released to PyPi (1.3)
res_vec.append(derived_observable(_mat, raveled_obs, n=0, i=i, **kwargs))
return np.array(res_vec) @ np.identity(dim)
def _num_diff_pinv(obs, **kwargs):
"""Computes the Moore-Penrose pseudoinverse of a matrix of Obs elementwise which is suitable
for numerical differentiation."""
def _mat(x, **kwargs):
shape = kwargs.get('shape')
mat = []
for i in range(shape[0]):
row = []
for j in range(shape[1]):
row.append(x[j + shape[1] * i])
mat.append(row)
return np.linalg.pinv(np.array(mat))[kwargs.get('i')][kwargs.get('j')]
if isinstance(obs, np.ndarray):
shape = obs.shape
raveled_obs = (1 * (obs.ravel())).tolist()
else:
raise TypeError('Unproper type of input.')
res_mat = []
for i in range(shape[1]):
row = []
for j in range(shape[0]):
row.append(derived_observable(_mat, raveled_obs, shape=shape, i=i, j=j, **kwargs))
res_mat.append(row)
return np.array(res_mat) @ np.identity(shape[0])
def _num_diff_svd(obs, **kwargs):
"""Computes the singular value decomposition of a matrix of Obs elementwise which
is suitable for numerical differentiation."""
def _mat(x, **kwargs):
shape = kwargs.get('shape')
mat = []
for i in range(shape[0]):
row = []
for j in range(shape[1]):
row.append(x[j + shape[1] * i])
mat.append(row)
res = np.linalg.svd(np.array(mat), full_matrices=False)
if kwargs.get('n') == 1:
return res[1][kwargs.get('i')]
else:
return res[kwargs.get('n')][kwargs.get('i')][kwargs.get('j')]
if isinstance(obs, np.ndarray):
shape = obs.shape
raveled_obs = (1 * (obs.ravel())).tolist()
else:
raise TypeError('Unproper type of input.')
mid_index = min(shape[0], shape[1])
res_mat0 = []
for i in range(shape[0]):
row = []
for j in range(mid_index):
row.append(derived_observable(_mat, raveled_obs, shape=shape, n=0, i=i, j=j, **kwargs))
res_mat0.append(row)
res_mat1 = []
for i in range(mid_index):
res_mat1.append(derived_observable(_mat, raveled_obs, shape=shape, n=1, i=i, **kwargs))
res_mat2 = []
for i in range(mid_index):
row = []
for j in range(shape[1]):
row.append(derived_observable(_mat, raveled_obs, shape=shape, n=2, i=i, j=j, **kwargs))
res_mat2.append(row)
return (np.array(res_mat0) @ np.identity(mid_index), np.array(res_mat1) @ np.identity(mid_index), np.array(res_mat2) @ np.identity(shape[1]))

84
pyerrors/misc.py Normal file
View file

@ -0,0 +1,84 @@
#!/usr/bin/env python
# coding: utf-8
import gc
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
from .pyerrors import Obs
def gen_correlated_data(means, cov, name, tau=0.5, samples=1000):
""" Generate observables with given covariance and autocorrelation times.
Arguments
-----------------
means -- list containing the mean value of each observable.
cov -- covariance matrix for the data to be geneated.
name -- ensemble name for the data to be geneated.
tau -- can either be a real number or a list with an entry for
every dataset.
samples -- number of samples to be generated for each observable.
"""
assert len(means) == cov.shape[-1]
tau = np.asarray(tau)
if np.min(tau) < 0.5:
raise Exception('All integrated autocorrelations have to be >= 0.5.')
a = (2 * tau - 1) / (2 * tau + 1)
rand = np.random.multivariate_normal(np.zeros_like(means), cov * samples, samples)
# Normalize samples such that sample variance matches input
norm = np.array([np.var(o, ddof=1) / samples for o in rand.T])
rand = rand @ np.diag(np.sqrt(np.diag(cov))) @ np.diag(1 / np.sqrt(norm))
data = [rand[0]]
for i in range(1, samples):
data.append(np.sqrt(1 - a ** 2) * rand[i] + a * data[-1])
corr_data = np.array(data) - np.mean(data, axis=0) + means
return [Obs([dat], [name]) for dat in corr_data.T]
def ks_test(obs=None):
"""Performs a KolmogorovSmirnov test for the Q-values of a list of Obs.
If no list is given all Obs in memory are used.
Disclaimer: The determination of the individual Q-values as well as this function have not been tested yet.
"""
if obs is None:
obs_list = []
for obj in gc.get_objects():
if isinstance(obj, Obs):
obs_list.append(obj)
else:
obs_list = obs
Qs = []
for obs_i in obs_list:
for ens in obs_i.e_names:
if obs_i.e_Q[ens] is not None:
Qs.append(obs_i.e_Q[ens])
bins = len(Qs)
x = np.arange(0, 1.001, 0.001)
plt.plot(x, x, 'k', zorder=1)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel('Q value')
plt.ylabel('Cumulative probability')
plt.title(str(bins) + ' Q values')
n = np.arange(1, bins + 1) / np.float(bins)
Xs = np.sort(Qs)
plt.step(Xs, n)
diffs = n - Xs
loc_max_diff = np.argmax(np.abs(diffs))
loc = Xs[loc_max_diff]
plt.annotate(s='', xy=(loc, loc), xytext=(loc, loc + diffs[loc_max_diff]), arrowprops=dict(arrowstyle='<->', shrinkA=0, shrinkB=0))
plt.show()
print(scipy.stats.kstest(Qs, 'uniform'))

112
pyerrors/mpm.py Normal file
View file

@ -0,0 +1,112 @@
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import scipy.linalg
from .pyerrors import Obs
from .linalg import svd, eig, pinv
def matrix_pencil_method(corrs, k=1, p=None, **kwargs):
""" Matrix pencil method to extract k energy levels from data
Implementation of the matrix pencil method based on
eq. (2.17) of Y. Hua, T. K. Sarkar, IEEE Trans. Acoust. 38, 814-824 (1990)
Parameters
----------
data -- can be a list of Obs for the analysis of a single correlator, or a list of lists
of Obs if several correlators are to analyzed at once.
k -- Number of states to extract (default 1).
p -- matrix pencil parameter which filters noise. The optimal value is expected between
len(data)/3 and 2*len(data)/3. The computation is more expensive the closer p is
to len(data)/2 but could possibly suppress more noise (default len(data)//2).
"""
if isinstance(corrs[0], Obs):
data = [corrs]
else:
data = corrs
lengths = [len(d) for d in data]
if lengths.count(lengths[0]) != len(lengths):
raise Exception('All datasets have to have the same length.')
data_sets = len(data)
n_data = len(data[0])
if p is None:
p = max(n_data // 2, k)
if n_data <= p:
raise Exception('The pencil p has to be smaller than the number of data samples.')
if p < k or n_data - p < k:
raise Exception('Cannot extract', k, 'energy levels with p=', p, 'and N-p=', n_data - p)
# Construct the hankel matrices
matrix = []
for n in range(data_sets):
matrix.append(scipy.linalg.hankel(data[n][:n_data-p], data[n][n_data-p-1:]))
matrix = np.array(matrix)
# Construct y1 and y2
y1 = np.concatenate(matrix[:, :, :p])
y2 = np.concatenate(matrix[:, :, 1:])
# Apply SVD to y2
u, s, vh = svd(y2, **kwargs)
# Construct z from y1 and SVD of y2, setting all singular values beyond the kth to zero
z = np.diag(1. / s[:k]) @ u[:, :k].T @ y1 @ vh.T[:, :k]
# Return the sorted logarithms of the real eigenvalues as Obs
energy_levels = np.log(np.abs(eig(z, **kwargs)))
return sorted(energy_levels, key=lambda x: abs(x.value))
def matrix_pencil_method_old(data, p, noise_level=None, verbose=1, **kwargs):
""" Older impleentation of the matrix pencil method with pencil p on given data to
extract energy levels.
Parameters
----------
data -- lists of Obs, where the nth entry is considered to be the correlation function
at x0=n+offset.
p -- matrix pencil parameter which corresponds to the number of energy levels to extract.
higher values for p can help decreasing noise.
noise_level -- If this argument is not None an additional prefiltering via singular
value decomposition is performed in which all singular values below 10^(-noise_level)
times the largest singular value are discarded. This increases the computation time.
verbose -- if larger than zero details about the noise filtering are printed to stdout
(default 1)
"""
n_data = len(data)
if n_data <= p:
raise Exception('The pencil p has to be smaller than the number of data samples.')
matrix = scipy.linalg.hankel(data[:n_data-p], data[n_data-p-1:]) @ np.identity(p + 1)
if noise_level is not None:
u, s, vh = svd(matrix)
s_values = np.vectorize(lambda x: x.value)(s)
if verbose > 0:
print('Singular values: ', s_values)
digit = np.argwhere(s_values / s_values[0] < 10.0**(-noise_level))
if digit.size == 0:
digit = len(s_values)
else:
digit = int(digit[0])
if verbose > 0:
print('Consider only', digit, 'out of', len(s), 'singular values')
new_matrix = u[:, :digit] * s[:digit] @ vh[:digit, :]
y1 = new_matrix[:, :-1]
y2 = new_matrix[:, 1:]
else:
y1 = matrix[:, :-1]
y2 = matrix[:, 1:]
# MoorePenrose pseudoinverse
pinv_y1 = pinv(y1)
# Note: Automatic differentiation of eig is implemented in the git of autograd
# but not yet released to PyPi (1.3). The code is currently part of pyerrors
e = eig((pinv_y1 @ y2), **kwargs)
energy_levels = -np.log(np.abs(e))
return sorted(energy_levels, key=lambda x: abs(x.value))

1222
pyerrors/pyerrors.py Normal file

File diff suppressed because it is too large Load diff