added basic functionality for covobs

This commit is contained in:
Simon Kuberski 2021-11-29 12:15:27 +01:00
parent 1326e9c863
commit 30ba138558
2 changed files with 315 additions and 150 deletions

54
pyerrors/covobs.py Normal file
View file

@ -0,0 +1,54 @@
import numpy as np
class Covobs:
def __init__(self, mean, cov, name, pos=None, grad=None):
""" Initialize Covobs object.
Parameters
----------
mean : float
Mean value of the new Obs
cov : list or array
2d Covariance matrix or 1d diagonal entries
name : str
identifier for the covariance matrix
pos : int
Position of the variance belonging to mean in cov.
Is taken to be 1 if cov is 0-dimensional
grad : list or array
Gradient of the Covobs wrt. the means belonging to cov.
"""
self.cov = np.array(cov)
if self.cov.ndim == 0:
self.N = 1
elif self.cov.ndim == 1:
self.N = len(self.cov)
self.cov = np.diag(self.cov)
elif self.cov.ndim == 2:
self.N = self.cov.shape[0]
if self.cov.shape[1] != self.N:
raise Exception('Covariance matrix has to be a square matrix!')
else:
raise Exception('Covariance matrix has to be a 2 dimensional square matrix!')
self.name = name
if grad is None:
if pos is None:
if self.N == 1:
pos = 0
else:
raise Exception('Have to specify position of cov-element belonging to mean!')
else:
if pos > self.N:
raise Exception('pos %d too large for covariance matrix with dimension %dx%d!' % (pos, self.N, self.N))
self.grad = np.zeros((self.N, 1))
self.grad[pos] = 1.
else:
self.grad = np.array(grad)
self.value = mean
def errsq(self):
""" Return the variance (= square of the error) of the Covobs
"""
return float(np.dot(np.transpose(self.grad), np.dot(self.cov, self.grad)))

View file

@ -6,6 +6,7 @@ from autograd import jacobian
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numdifftools as nd import numdifftools as nd
from itertools import groupby from itertools import groupby
from .covobs import Covobs
class Obs: class Obs:
@ -37,11 +38,11 @@ class Obs:
Dictionary for N_sigma values. If an entry for a given ensemble exists Dictionary for N_sigma values. If an entry for a given ensemble exists
this overwrites the standard value for that ensemble. this overwrites the standard value for that ensemble.
""" """
__slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', #__slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue',
'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', # 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma',
'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', # 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint',
'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', # 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint',
'idl', 'is_merged', 'tag', '__dict__'] # 'idl', 'is_merged', 'tag', '__dict__']
S_global = 2.0 S_global = 2.0
S_dict = {} S_dict = {}
@ -51,7 +52,7 @@ class Obs:
N_sigma_dict = {} N_sigma_dict = {}
filter_eps = 1e-10 filter_eps = 1e-10
def __init__(self, samples, names, idl=None, means=None, **kwargs): def __init__(self, samples, names, idl=None, means=None, covobs=None, **kwargs):
""" Initialize Obs object. """ Initialize Obs object.
Parameters Parameters
@ -67,7 +68,7 @@ class Obs:
already subtracted from the samples already subtracted from the samples
""" """
if means is None: if means is None and not kwargs.get('empty', False):
if len(samples) != len(names): if len(samples) != len(names):
raise Exception('Length of samples and names incompatible.') raise Exception('Length of samples and names incompatible.')
if idl is not None: if idl is not None:
@ -80,12 +81,21 @@ class Obs:
if min(len(x) for x in samples) <= 4: if min(len(x) for x in samples) <= 4:
raise Exception('Samples have to have at least 5 entries.') raise Exception('Samples have to have at least 5 entries.')
if kwargs.get('empty', False):
self.names = []
else:
self.names = sorted(names) self.names = sorted(names)
self.shape = {} self.shape = {}
self.r_values = {} self.r_values = {}
self.deltas = {} self.deltas = {}
if covobs is None:
self.covobs = {}
else:
self.covobs = covobs
self.idl = {} self.idl = {}
if not kwargs.get('empty', False):
if idl is not None: if idl is not None:
for name, idx in sorted(zip(names, idl)): for name, idx in sorted(zip(names, idl)):
if isinstance(idx, range): if isinstance(idx, range):
@ -126,6 +136,10 @@ class Obs:
for name in self.names: for name in self.names:
self._value += self.shape[name] * self.r_values[name] self._value += self.shape[name] * self.r_values[name]
self._value /= self.N self._value /= self.N
else:
self._value = 0
self.is_merged = {}
self.N = 0
self._dvalue = 0.0 self._dvalue = 0.0
self.ddvalue = 0.0 self.ddvalue = 0.0
@ -220,7 +234,7 @@ class Obs:
_parse_kwarg('N_sigma') _parse_kwarg('N_sigma')
for e, e_name in enumerate(self.e_names): for e, e_name in enumerate(self.e_names):
if e_name not in self.covobs:
r_length = [] r_length = []
for r_name in e_content[e_name]: for r_name in e_content[e_name]:
if isinstance(self.idl[r_name], range): if isinstance(self.idl[r_name], range):
@ -306,6 +320,11 @@ class Obs:
self._dvalue += self.e_dvalue[e_name] ** 2 self._dvalue += self.e_dvalue[e_name] ** 2
self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2
else:
self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq())
self.e_ddvalue[e_name] = 0
self._dvalue += self.e_dvalue[e_name]**2
self._dvalue = np.sqrt(self.dvalue) self._dvalue = np.sqrt(self.dvalue)
if self._dvalue == 0.0: if self._dvalue == 0.0:
self.ddvalue = 0.0 self.ddvalue = 0.0
@ -367,12 +386,15 @@ class Obs:
if len(self.e_names) > 1: if len(self.e_names) > 1:
print(' Ensemble errors:') print(' Ensemble errors:')
for e_name in self.e_names: for e_name in self.e_names:
if e_name not in self.covobs:
if len(self.e_names) > 1: if len(self.e_names) > 1:
print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name]))
if self.tau_exp[e_name] > 0: if self.tau_exp[e_name] > 0:
print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name]))
else: else:
print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name]))
else:
print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name]))
if ens_content is True: if ens_content is True:
if len(self.e_names) == 1: if len(self.e_names) == 1:
print(self.N, 'samples in', len(self.e_names), 'ensemble:') print(self.N, 'samples in', len(self.e_names), 'ensemble:')
@ -380,6 +402,7 @@ class Obs:
print(self.N, 'samples in', len(self.e_names), 'ensembles:') print(self.N, 'samples in', len(self.e_names), 'ensembles:')
my_string_list = [] my_string_list = []
for key, value in sorted(self.e_content.items()): for key, value in sorted(self.e_content.items()):
if key not in self.covobs:
my_string = ' ' + "\u00B7 Ensemble '" + key + "' " my_string = ' ' + "\u00B7 Ensemble '" + key + "' "
if len(value) == 1: if len(value) == 1:
my_string += f': {self.shape[value[0]]} configurations' my_string += f': {self.shape[value[0]]} configurations'
@ -399,6 +422,8 @@ class Obs:
sublist.append(my_substring) sublist.append(my_substring)
my_string += '\n' + '\n'.join(sublist) my_string += '\n' + '\n'.join(sublist)
else:
my_string = ' ' + "\u00B7 Covobs '" + key + "' "
my_string_list.append(my_string) my_string_list.append(my_string)
print('\n'.join(my_string_list)) print('\n'.join(my_string_list))
@ -1028,6 +1053,15 @@ def derived_observable(func, data, **kwargs):
if isinstance(raveled_data[i], (int, float)): if isinstance(raveled_data[i], (int, float)):
raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name], idl=[first_idl]) raveled_data[i] = Obs([raveled_data[i] + np.zeros(first_shape)], [first_name], idl=[first_idl])
allcov = {}
for o in raveled_data:
for name in o.covobs:
if name in allcov:
if not np.array_equal(allcov[name], o.covobs[name].cov):
raise Exception('Inconsistent covariance matrices for %s!' % (name))
else:
allcov[name] = o.covobs[name].cov
n_obs = len(raveled_data) n_obs = len(raveled_data)
new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x]))
@ -1100,14 +1134,25 @@ def derived_observable(func, data, **kwargs):
for i_val, new_val in np.ndenumerate(new_values): for i_val, new_val in np.ndenumerate(new_values):
new_deltas = {} new_deltas = {}
new_grad = {}
for j_obs, obs in np.ndenumerate(data): for j_obs, obs in np.ndenumerate(data):
for name in obs.names: for name in obs.names:
if name in obs.covobs:
if name in new_grad:
new_grad[name] += deriv[i_val + j_obs] * obs.covobs[name].grad
else:
new_grad[name] = deriv[i_val + j_obs] * obs.covobs[name].grad
else:
new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name])
new_covobs = {name: Covobs(obs.covobs[name].value, obs.covobs[name].cov, obs.covobs[name].name, grad=new_grad[name]) for name in new_grad}
new_samples = [] new_samples = []
new_means = [] new_means = []
new_idl = [] new_idl = []
new_names_obs = []
for name in new_names: for name in new_names:
if name not in new_covobs:
if is_merged[name]: if is_merged[name]:
filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name])
else: else:
@ -1117,7 +1162,13 @@ def derived_observable(func, data, **kwargs):
new_samples.append(filtered_deltas) new_samples.append(filtered_deltas)
new_idl.append(filtered_idl_d) new_idl.append(filtered_idl_d)
new_means.append(new_r_values[name][i_val]) new_means.append(new_r_values[name][i_val])
final_result[i_val] = Obs(new_samples, new_names, means=new_means, idl=new_idl) new_names_obs.append(name)
final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl)
for name in new_covobs:
final_result[i_val].names.append(name)
final_result[i_val].shape[name] = 1
final_result[i_val].idl[name] = []
final_result[i_val].covobs = new_covobs
final_result[i_val]._value = new_val final_result[i_val]._value = new_val
final_result[i_val].is_merged = is_merged final_result[i_val].is_merged = is_merged
final_result[i_val].reweighted = reweighted final_result[i_val].reweighted = reweighted
@ -1603,3 +1654,63 @@ def merge_obs(list_of_obs):
o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names}
o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) o.reweighted = np.max([oi.reweighted for oi in list_of_obs])
return o return o
def covobs_to_obs(co):
"""Make an Obs out of a Covobs
Parameters
----------
co : Covobs
Covobs to be embedded into the Obs
"""
o = Obs(None, None, empty=True)
o._value = co.value
o.names.append(co.name)
o.covobs[co.name] = co
o._dvalue = np.sqrt(co.errsq())
o.shape[co.name] = 1
o.idl[co.name] = []
return o
def create_Covobs(mean, cov, name, pos=None, grad=None):
"""Make an Obs based on a Covobs
Parameters
----------
mean : float
Mean value of the new Obs
cov : list or array
2d Covariance matrix or 1d diagonal entries
name : str
identifier for the covariance matrix
pos : int
Position of the variance belonging to mean in cov.
Is taken to be 1 if cov is 0-dimensional
grad : list or array
Gradient of the Covobs wrt. the means belonging to cov.
"""
return covobs_to_obs(Covobs(mean, cov, name, pos=pos, grad=grad))
def create_Covobs_list(means, cov, name, grad=None):
"""Make a list of Obs based Covobs
Parameters
----------
mean : list of floats
N mean values of the new Obs
cov : list or array
2d (NxN) Covariance matrix or 1d diagonal entries
name : str
identifier for the covariance matrix
grad : list or array
Gradient of the Covobs wrt. the means belonging to cov.
"""
ol = []
for i in range(len(means)):
ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad)))
if ol[0].covobs[name].N != len(means):
raise Exception('You have to provide %d mean values!' % (ol[0].N))
return ol