pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15# Improve print output of numpy.ndarrays containing Obs objects. 16np.set_printoptions(formatter={'object': lambda x: str(x)}) 17 18 19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise ValueError('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise ValueError("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise TypeError('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None 141 142 @property 143 def value(self): 144 return self._value 145 146 @property 147 def dvalue(self): 148 return self._dvalue 149 150 @property 151 def e_names(self): 152 return sorted(set([o.split('|')[0] for o in self.names])) 153 154 @property 155 def cov_names(self): 156 return sorted(set([o for o in self.covobs.keys()])) 157 158 @property 159 def mc_names(self): 160 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 161 162 @property 163 def e_content(self): 164 res = {} 165 for e, e_name in enumerate(self.e_names): 166 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 167 if e_name in self.names: 168 res[e_name].append(e_name) 169 return res 170 171 @property 172 def covobs(self): 173 return self._covobs 174 175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 gapsize = _determine_gap(self, e_content, e_name) 242 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 274 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 275 # Make sure no entry of tauint is smaller than 0.5 276 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 277 # hep-lat/0306017 eq. (42) 278 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 279 self.e_n_dtauint[e_name][0] = 0.0 280 281 def _compute_drho(i): 282 tmp = (self.e_rho[e_name][i + 1:w_max] 283 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 284 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 285 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 286 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 287 288 if self.tau_exp[e_name] > 0: 289 _compute_drho(1) 290 texp = self.tau_exp[e_name] 291 # Critical slowing down analysis 292 if w_max // 2 <= 1: 293 raise Exception("Need at least 8 samples for tau_exp error analysis") 294 for n in range(1, w_max // 2): 295 _compute_drho(n + 1) 296 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 297 # Bias correction hep-lat/0306017 eq. (49) included 298 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 299 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 300 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 301 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 302 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 303 self.e_windowsize[e_name] = n 304 break 305 else: 306 if self.S[e_name] == 0.0: 307 self.e_tauint[e_name] = 0.5 308 self.e_dtauint[e_name] = 0.0 309 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 310 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 311 self.e_windowsize[e_name] = 0 312 else: 313 # Standard automatic windowing procedure 314 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 315 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 316 for n in range(1, w_max): 317 if g_w[n - 1] < 0 or n >= w_max - 1: 318 _compute_drho(n) 319 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 320 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 321 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 322 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 323 self.e_windowsize[e_name] = n 324 break 325 326 self._dvalue += self.e_dvalue[e_name] ** 2 327 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 328 329 for e_name in self.cov_names: 330 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 331 self.e_ddvalue[e_name] = 0 332 self._dvalue += self.e_dvalue[e_name]**2 333 334 self._dvalue = np.sqrt(self._dvalue) 335 if self._dvalue == 0.0: 336 self.ddvalue = 0.0 337 else: 338 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 339 return 340 341 gm = gamma_method 342 343 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 344 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 345 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 346 347 Parameters 348 ---------- 349 deltas : list 350 List of fluctuations 351 idx : list 352 List or range of configurations on which the deltas are defined. 353 shape : int 354 Number of configurations in idx. 355 w_max : int 356 Upper bound for the summation window. 357 fft : bool 358 determines whether the fft algorithm is used for the computation 359 of the autocorrelation function. 360 gapsize : int 361 The target distance between two configurations. If longer distances 362 are found in idx, the data is expanded. 363 """ 364 gamma = np.zeros(w_max) 365 deltas = _expand_deltas(deltas, idx, shape, gapsize) 366 new_shape = len(deltas) 367 if fft: 368 max_gamma = min(new_shape, w_max) 369 # The padding for the fft has to be even 370 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 371 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 372 else: 373 for n in range(w_max): 374 if new_shape - n >= 0: 375 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 376 377 return gamma 378 379 def details(self, ens_content=True): 380 """Output detailed properties of the Obs. 381 382 Parameters 383 ---------- 384 ens_content : bool 385 print details about the ensembles and replica if true. 386 """ 387 if self.tag is not None: 388 print("Description:", self.tag) 389 if not hasattr(self, 'e_dvalue'): 390 print('Result\t %3.8e' % (self.value)) 391 else: 392 if self.value == 0.0: 393 percentage = np.nan 394 else: 395 percentage = np.abs(self._dvalue / self.value) * 100 396 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 397 if len(self.e_names) > 1: 398 print(' Ensemble errors:') 399 e_content = self.e_content 400 for e_name in self.mc_names: 401 gap = _determine_gap(self, e_content, e_name) 402 403 if len(self.e_names) > 1: 404 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 405 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 406 tau_string += f" in units of {gap} config" 407 if gap > 1: 408 tau_string += "s" 409 if self.tau_exp[e_name] > 0: 410 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 411 else: 412 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 413 print(tau_string) 414 for e_name in self.cov_names: 415 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 416 if ens_content is True: 417 if len(self.e_names) == 1: 418 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 419 else: 420 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 421 my_string_list = [] 422 for key, value in sorted(self.e_content.items()): 423 if key not in self.covobs: 424 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 425 if len(value) == 1: 426 my_string += f': {self.shape[value[0]]} configurations' 427 if isinstance(self.idl[value[0]], range): 428 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 429 else: 430 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 431 else: 432 sublist = [] 433 for v in value: 434 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 435 my_substring += f': {self.shape[v]} configurations' 436 if isinstance(self.idl[v], range): 437 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 438 else: 439 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 440 sublist.append(my_substring) 441 442 my_string += '\n' + '\n'.join(sublist) 443 else: 444 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 445 my_string_list.append(my_string) 446 print('\n'.join(my_string_list)) 447 448 def reweight(self, weight): 449 """Reweight the obs with given rewighting factors. 450 451 Parameters 452 ---------- 453 weight : Obs 454 Reweighting factor. An Observable that has to be defined on a superset of the 455 configurations in obs[i].idl for all i. 456 all_configs : bool 457 if True, the reweighted observables are normalized by the average of 458 the reweighting factor on all configurations in weight.idl and not 459 on the configurations in obs[i].idl. Default False. 460 """ 461 return reweight(weight, [self])[0] 462 463 def is_zero_within_error(self, sigma=1): 464 """Checks whether the observable is zero within 'sigma' standard errors. 465 466 Parameters 467 ---------- 468 sigma : int 469 Number of standard errors used for the check. 470 471 Works only properly when the gamma method was run. 472 """ 473 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 474 475 def is_zero(self, atol=1e-10): 476 """Checks whether the observable is zero within a given tolerance. 477 478 Parameters 479 ---------- 480 atol : float 481 Absolute tolerance (for details see numpy documentation). 482 """ 483 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 484 485 def plot_tauint(self, save=None): 486 """Plot integrated autocorrelation time for each ensemble. 487 488 Parameters 489 ---------- 490 save : str 491 saves the figure to a file named 'save' if. 492 """ 493 if not hasattr(self, 'e_dvalue'): 494 raise Exception('Run the gamma method first.') 495 496 for e, e_name in enumerate(self.mc_names): 497 fig = plt.figure() 498 plt.xlabel(r'$W$') 499 plt.ylabel(r'$\tau_\mathrm{int}$') 500 length = int(len(self.e_n_tauint[e_name])) 501 if self.tau_exp[e_name] > 0: 502 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 503 x_help = np.arange(2 * self.tau_exp[e_name]) 504 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 505 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 506 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 507 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 508 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 511 else: 512 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 513 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 514 515 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 516 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 517 plt.legend() 518 plt.xlim(-0.5, xmax) 519 ylim = plt.ylim() 520 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 521 plt.draw() 522 if save: 523 fig.savefig(save + "_" + str(e)) 524 525 def plot_rho(self, save=None): 526 """Plot normalized autocorrelation function time for each ensemble. 527 528 Parameters 529 ---------- 530 save : str 531 saves the figure to a file named 'save' if. 532 """ 533 if not hasattr(self, 'e_dvalue'): 534 raise Exception('Run the gamma method first.') 535 for e, e_name in enumerate(self.mc_names): 536 fig = plt.figure() 537 plt.xlabel('W') 538 plt.ylabel('rho') 539 length = int(len(self.e_drho[e_name])) 540 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 541 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 542 if self.tau_exp[e_name] > 0: 543 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 544 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 545 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 546 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 547 else: 548 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 549 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 550 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 551 plt.xlim(-0.5, xmax) 552 plt.draw() 553 if save: 554 fig.savefig(save + "_" + str(e)) 555 556 def plot_rep_dist(self): 557 """Plot replica distribution for each ensemble with more than one replicum.""" 558 if not hasattr(self, 'e_dvalue'): 559 raise Exception('Run the gamma method first.') 560 for e, e_name in enumerate(self.mc_names): 561 if len(self.e_content[e_name]) == 1: 562 print('No replica distribution for a single replicum (', e_name, ')') 563 continue 564 r_length = [] 565 sub_r_mean = 0 566 for r, r_name in enumerate(self.e_content[e_name]): 567 r_length.append(len(self.deltas[r_name])) 568 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 569 e_N = np.sum(r_length) 570 sub_r_mean /= e_N 571 arr = np.zeros(len(self.e_content[e_name])) 572 for r, r_name in enumerate(self.e_content[e_name]): 573 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 574 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 575 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 576 plt.draw() 577 578 def plot_history(self, expand=True): 579 """Plot derived Monte Carlo history for each ensemble 580 581 Parameters 582 ---------- 583 expand : bool 584 show expanded history for irregular Monte Carlo chains (default: True). 585 """ 586 for e, e_name in enumerate(self.mc_names): 587 plt.figure() 588 r_length = [] 589 tmp = [] 590 tmp_expanded = [] 591 for r, r_name in enumerate(self.e_content[e_name]): 592 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 593 if expand: 594 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 595 r_length.append(len(tmp_expanded[-1])) 596 else: 597 r_length.append(len(tmp[-1])) 598 e_N = np.sum(r_length) 599 x = np.arange(e_N) 600 y_test = np.concatenate(tmp, axis=0) 601 if expand: 602 y = np.concatenate(tmp_expanded, axis=0) 603 else: 604 y = y_test 605 plt.errorbar(x, y, fmt='.', markersize=3) 606 plt.xlim(-0.5, e_N - 0.5) 607 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 608 plt.draw() 609 610 def plot_piechart(self, save=None): 611 """Plot piechart which shows the fractional contribution of each 612 ensemble to the error and returns a dictionary containing the fractions. 613 614 Parameters 615 ---------- 616 save : str 617 saves the figure to a file named 'save' if. 618 """ 619 if not hasattr(self, 'e_dvalue'): 620 raise Exception('Run the gamma method first.') 621 if np.isclose(0.0, self._dvalue, atol=1e-15): 622 raise Exception('Error is 0.0') 623 labels = self.e_names 624 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 625 fig1, ax1 = plt.subplots() 626 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 627 ax1.axis('equal') 628 plt.draw() 629 if save: 630 fig1.savefig(save) 631 632 return dict(zip(self.e_names, sizes)) 633 634 def dump(self, filename, datatype="json.gz", description="", **kwargs): 635 """Dump the Obs to a file 'name' of chosen format. 636 637 Parameters 638 ---------- 639 filename : str 640 name of the file to be saved. 641 datatype : str 642 Format of the exported file. Supported formats include 643 "json.gz" and "pickle" 644 description : str 645 Description for output file, only relevant for json.gz format. 646 path : str 647 specifies a custom path for the file (default '.') 648 """ 649 if 'path' in kwargs: 650 file_name = kwargs.get('path') + '/' + filename 651 else: 652 file_name = filename 653 654 if datatype == "json.gz": 655 from .input.json import dump_to_json 656 dump_to_json([self], file_name, description=description) 657 elif datatype == "pickle": 658 with open(file_name + '.p', 'wb') as fb: 659 pickle.dump(self, fb) 660 else: 661 raise Exception("Unknown datatype " + str(datatype)) 662 663 def export_jackknife(self): 664 """Export jackknife samples from the Obs 665 666 Returns 667 ------- 668 numpy.ndarray 669 Returns a numpy array of length N + 1 where N is the number of samples 670 for the given ensemble and replicum. The zeroth entry of the array contains 671 the mean value of the Obs, entries 1 to N contain the N jackknife samples 672 derived from the Obs. The current implementation only works for observables 673 defined on exactly one ensemble and replicum. The derived jackknife samples 674 should agree with samples from a full jackknife analysis up to O(1/N). 675 """ 676 677 if len(self.names) != 1: 678 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 679 680 name = self.names[0] 681 full_data = self.deltas[name] + self.r_values[name] 682 n = full_data.size 683 mean = self.value 684 tmp_jacks = np.zeros(n + 1) 685 tmp_jacks[0] = mean 686 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 687 return tmp_jacks 688 689 def __float__(self): 690 return float(self.value) 691 692 def __repr__(self): 693 return 'Obs[' + str(self) + ']' 694 695 def __str__(self): 696 return _format_uncertainty(self.value, self._dvalue) 697 698 def __format__(self, format_type): 699 my_str = _format_uncertainty(self.value, self._dvalue, 700 significance=int(float(format_type.replace("+", "").replace("-", "")))) 701 for char in ["+", " "]: 702 if format_type.startswith(char): 703 if my_str[0] != "-": 704 my_str = char + my_str 705 return my_str 706 707 def __hash__(self): 708 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 709 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 710 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 711 hash_tuple += tuple([o.encode() for o in self.names]) 712 m = hashlib.md5() 713 [m.update(o) for o in hash_tuple] 714 return int(m.hexdigest(), 16) & 0xFFFFFFFF 715 716 # Overload comparisons 717 def __lt__(self, other): 718 return self.value < other 719 720 def __le__(self, other): 721 return self.value <= other 722 723 def __gt__(self, other): 724 return self.value > other 725 726 def __ge__(self, other): 727 return self.value >= other 728 729 def __eq__(self, other): 730 return (self - other).is_zero() 731 732 def __ne__(self, other): 733 return not (self - other).is_zero() 734 735 # Overload math operations 736 def __add__(self, y): 737 if isinstance(y, Obs): 738 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 739 else: 740 if isinstance(y, np.ndarray): 741 return np.array([self + o for o in y]) 742 elif y.__class__.__name__ in ['Corr', 'CObs']: 743 return NotImplemented 744 else: 745 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 746 747 def __radd__(self, y): 748 return self + y 749 750 def __mul__(self, y): 751 if isinstance(y, Obs): 752 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 753 else: 754 if isinstance(y, np.ndarray): 755 return np.array([self * o for o in y]) 756 elif isinstance(y, complex): 757 return CObs(self * y.real, self * y.imag) 758 elif y.__class__.__name__ in ['Corr', 'CObs']: 759 return NotImplemented 760 else: 761 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 762 763 def __rmul__(self, y): 764 return self * y 765 766 def __sub__(self, y): 767 if isinstance(y, Obs): 768 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 769 else: 770 if isinstance(y, np.ndarray): 771 return np.array([self - o for o in y]) 772 elif y.__class__.__name__ in ['Corr', 'CObs']: 773 return NotImplemented 774 else: 775 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 776 777 def __rsub__(self, y): 778 return -1 * (self - y) 779 780 def __pos__(self): 781 return self 782 783 def __neg__(self): 784 return -1 * self 785 786 def __truediv__(self, y): 787 if isinstance(y, Obs): 788 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 789 else: 790 if isinstance(y, np.ndarray): 791 return np.array([self / o for o in y]) 792 elif y.__class__.__name__ in ['Corr', 'CObs']: 793 return NotImplemented 794 else: 795 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 796 797 def __rtruediv__(self, y): 798 if isinstance(y, Obs): 799 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 800 else: 801 if isinstance(y, np.ndarray): 802 return np.array([o / self for o in y]) 803 elif y.__class__.__name__ in ['Corr', 'CObs']: 804 return NotImplemented 805 else: 806 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 807 808 def __pow__(self, y): 809 if isinstance(y, Obs): 810 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 811 else: 812 return derived_observable(lambda x: x[0] ** y, [self]) 813 814 def __rpow__(self, y): 815 if isinstance(y, Obs): 816 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 817 else: 818 return derived_observable(lambda x: y ** x[0], [self]) 819 820 def __abs__(self): 821 return derived_observable(lambda x: anp.abs(x[0]), [self]) 822 823 # Overload numpy functions 824 def sqrt(self): 825 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 826 827 def log(self): 828 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 829 830 def exp(self): 831 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 832 833 def sin(self): 834 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 835 836 def cos(self): 837 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 838 839 def tan(self): 840 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 841 842 def arcsin(self): 843 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 844 845 def arccos(self): 846 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 847 848 def arctan(self): 849 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 850 851 def sinh(self): 852 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 853 854 def cosh(self): 855 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 856 857 def tanh(self): 858 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 859 860 def arcsinh(self): 861 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 862 863 def arccosh(self): 864 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 865 866 def arctanh(self): 867 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 868 869 870class CObs: 871 """Class for a complex valued observable.""" 872 __slots__ = ['_real', '_imag', 'tag'] 873 874 def __init__(self, real, imag=0.0): 875 self._real = real 876 self._imag = imag 877 self.tag = None 878 879 @property 880 def real(self): 881 return self._real 882 883 @property 884 def imag(self): 885 return self._imag 886 887 def gamma_method(self, **kwargs): 888 """Executes the gamma_method for the real and the imaginary part.""" 889 if isinstance(self.real, Obs): 890 self.real.gamma_method(**kwargs) 891 if isinstance(self.imag, Obs): 892 self.imag.gamma_method(**kwargs) 893 894 def is_zero(self): 895 """Checks whether both real and imaginary part are zero within machine precision.""" 896 return self.real == 0.0 and self.imag == 0.0 897 898 def conjugate(self): 899 return CObs(self.real, -self.imag) 900 901 def __add__(self, other): 902 if isinstance(other, np.ndarray): 903 return other + self 904 elif hasattr(other, 'real') and hasattr(other, 'imag'): 905 return CObs(self.real + other.real, 906 self.imag + other.imag) 907 else: 908 return CObs(self.real + other, self.imag) 909 910 def __radd__(self, y): 911 return self + y 912 913 def __sub__(self, other): 914 if isinstance(other, np.ndarray): 915 return -1 * (other - self) 916 elif hasattr(other, 'real') and hasattr(other, 'imag'): 917 return CObs(self.real - other.real, self.imag - other.imag) 918 else: 919 return CObs(self.real - other, self.imag) 920 921 def __rsub__(self, other): 922 return -1 * (self - other) 923 924 def __mul__(self, other): 925 if isinstance(other, np.ndarray): 926 return other * self 927 elif hasattr(other, 'real') and hasattr(other, 'imag'): 928 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 929 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 930 [self.real, other.real, self.imag, other.imag], 931 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 932 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 933 [self.real, other.real, self.imag, other.imag], 934 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 935 elif getattr(other, 'imag', 0) != 0: 936 return CObs(self.real * other.real - self.imag * other.imag, 937 self.imag * other.real + self.real * other.imag) 938 else: 939 return CObs(self.real * other.real, self.imag * other.real) 940 else: 941 return CObs(self.real * other, self.imag * other) 942 943 def __rmul__(self, other): 944 return self * other 945 946 def __truediv__(self, other): 947 if isinstance(other, np.ndarray): 948 return 1 / (other / self) 949 elif hasattr(other, 'real') and hasattr(other, 'imag'): 950 r = other.real ** 2 + other.imag ** 2 951 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 952 else: 953 return CObs(self.real / other, self.imag / other) 954 955 def __rtruediv__(self, other): 956 r = self.real ** 2 + self.imag ** 2 957 if hasattr(other, 'real') and hasattr(other, 'imag'): 958 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 959 else: 960 return CObs(self.real * other / r, -self.imag * other / r) 961 962 def __abs__(self): 963 return np.sqrt(self.real**2 + self.imag**2) 964 965 def __pos__(self): 966 return self 967 968 def __neg__(self): 969 return -1 * self 970 971 def __eq__(self, other): 972 return self.real == other.real and self.imag == other.imag 973 974 def __str__(self): 975 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 976 977 def __repr__(self): 978 return 'CObs[' + str(self) + ']' 979 980 981def _format_uncertainty(value, dvalue, significance=2): 982 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 983 if dvalue == 0.0: 984 return str(value) 985 if not isinstance(significance, int): 986 raise TypeError("significance needs to be an integer.") 987 if significance < 1: 988 raise ValueError("significance needs to be larger than zero.") 989 fexp = np.floor(np.log10(dvalue)) 990 if fexp < 0.0: 991 return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f') 992 elif fexp == 0.0: 993 return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})" 994 else: 995 return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})" 996 997 998def _expand_deltas(deltas, idx, shape, gapsize): 999 """Expand deltas defined on idx to a regular range with spacing gapsize between two 1000 configurations and where holes are filled by 0. 1001 If idx is of type range, the deltas are not changed if the idx.step == gapsize. 1002 1003 Parameters 1004 ---------- 1005 deltas : list 1006 List of fluctuations 1007 idx : list 1008 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1009 shape : int 1010 Number of configs in idx. 1011 gapsize : int 1012 The target distance between two configurations. If longer distances 1013 are found in idx, the data is expanded. 1014 """ 1015 if isinstance(idx, range): 1016 if (idx.step == gapsize): 1017 return deltas 1018 ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize) 1019 for i in range(shape): 1020 ret[(idx[i] - idx[0]) // gapsize] = deltas[i] 1021 return ret 1022 1023 1024def _merge_idx(idl): 1025 """Returns the union of all lists in idl as sorted list 1026 1027 Parameters 1028 ---------- 1029 idl : list 1030 List of lists or ranges. 1031 """ 1032 1033 # Use groupby to efficiently check whether all elements of idl are identical 1034 try: 1035 g = groupby(idl) 1036 if next(g, True) and not next(g, False): 1037 return idl[0] 1038 except Exception: 1039 pass 1040 1041 if np.all([type(idx) is range for idx in idl]): 1042 if len(set([idx[0] for idx in idl])) == 1: 1043 idstart = min([idx.start for idx in idl]) 1044 idstop = max([idx.stop for idx in idl]) 1045 idstep = min([idx.step for idx in idl]) 1046 return range(idstart, idstop, idstep) 1047 1048 return sorted(set().union(*idl)) 1049 1050 1051def _intersection_idx(idl): 1052 """Returns the intersection of all lists in idl as sorted list 1053 1054 Parameters 1055 ---------- 1056 idl : list 1057 List of lists or ranges. 1058 """ 1059 1060 def _lcm(*args): 1061 """Returns the lowest common multiple of args. 1062 1063 From python 3.9 onwards the math library contains an lcm function.""" 1064 return reduce(lambda a, b: a * b // gcd(a, b), args) 1065 1066 # Use groupby to efficiently check whether all elements of idl are identical 1067 try: 1068 g = groupby(idl) 1069 if next(g, True) and not next(g, False): 1070 return idl[0] 1071 except Exception: 1072 pass 1073 1074 if np.all([type(idx) is range for idx in idl]): 1075 if len(set([idx[0] for idx in idl])) == 1: 1076 idstart = max([idx.start for idx in idl]) 1077 idstop = min([idx.stop for idx in idl]) 1078 idstep = _lcm(*[idx.step for idx in idl]) 1079 return range(idstart, idstop, idstep) 1080 1081 return sorted(set.intersection(*[set(o) for o in idl])) 1082 1083 1084def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1085 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1086 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1087 common divisor of the step sizes is used as new step size. 1088 1089 Parameters 1090 ---------- 1091 deltas : list 1092 List of fluctuations 1093 idx : list 1094 List or range of configs on which the deltas are defined. 1095 Has to be a subset of new_idx and has to be sorted in ascending order. 1096 shape : list 1097 Number of configs in idx. 1098 new_idx : list 1099 List of configs that defines the new range, has to be sorted in ascending order. 1100 """ 1101 1102 if type(idx) is range and type(new_idx) is range: 1103 if idx == new_idx: 1104 return deltas 1105 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1106 for i in range(shape): 1107 ret[idx[i] - new_idx[0]] = deltas[i] 1108 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) 1109 1110 1111def derived_observable(func, data, array_mode=False, **kwargs): 1112 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1113 1114 Parameters 1115 ---------- 1116 func : object 1117 arbitrary function of the form func(data, **kwargs). For the 1118 automatic differentiation to work, all numpy functions have to have 1119 the autograd wrapper (use 'import autograd.numpy as anp'). 1120 data : list 1121 list of Obs, e.g. [obs1, obs2, obs3]. 1122 num_grad : bool 1123 if True, numerical derivatives are used instead of autograd 1124 (default False). To control the numerical differentiation the 1125 kwargs of numdifftools.step_generators.MaxStepGenerator 1126 can be used. 1127 man_grad : list 1128 manually supply a list or an array which contains the jacobian 1129 of func. Use cautiously, supplying the wrong derivative will 1130 not be intercepted. 1131 1132 Notes 1133 ----- 1134 For simple mathematical operations it can be practical to use anonymous 1135 functions. For the ratio of two observables one can e.g. use 1136 1137 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1138 """ 1139 1140 data = np.asarray(data) 1141 raveled_data = data.ravel() 1142 1143 # Workaround for matrix operations containing non Obs data 1144 if not all(isinstance(x, Obs) for x in raveled_data): 1145 for i in range(len(raveled_data)): 1146 if isinstance(raveled_data[i], (int, float)): 1147 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1148 1149 allcov = {} 1150 for o in raveled_data: 1151 for name in o.cov_names: 1152 if name in allcov: 1153 if not np.allclose(allcov[name], o.covobs[name].cov): 1154 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1155 else: 1156 allcov[name] = o.covobs[name].cov 1157 1158 n_obs = len(raveled_data) 1159 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1160 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1161 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1162 1163 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1164 1165 if data.ndim == 1: 1166 values = np.array([o.value for o in data]) 1167 else: 1168 values = np.vectorize(lambda x: x.value)(data) 1169 1170 new_values = func(values, **kwargs) 1171 1172 multi = int(isinstance(new_values, np.ndarray)) 1173 1174 new_r_values = {} 1175 new_idl_d = {} 1176 for name in new_sample_names: 1177 idl = [] 1178 tmp_values = np.zeros(n_obs) 1179 for i, item in enumerate(raveled_data): 1180 tmp_values[i] = item.r_values.get(name, item.value) 1181 tmp_idl = item.idl.get(name) 1182 if tmp_idl is not None: 1183 idl.append(tmp_idl) 1184 if multi > 0: 1185 tmp_values = np.array(tmp_values).reshape(data.shape) 1186 new_r_values[name] = func(tmp_values, **kwargs) 1187 new_idl_d[name] = _merge_idx(idl) 1188 1189 if 'man_grad' in kwargs: 1190 deriv = np.asarray(kwargs.get('man_grad')) 1191 if new_values.shape + data.shape != deriv.shape: 1192 raise Exception('Manual derivative does not have correct shape.') 1193 elif kwargs.get('num_grad') is True: 1194 if multi > 0: 1195 raise Exception('Multi mode currently not supported for numerical derivative') 1196 options = { 1197 'base_step': 0.1, 1198 'step_ratio': 2.5} 1199 for key in options.keys(): 1200 kwarg = kwargs.get(key) 1201 if kwarg is not None: 1202 options[key] = kwarg 1203 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1204 if tmp_df.size == 1: 1205 deriv = np.array([tmp_df.real]) 1206 else: 1207 deriv = tmp_df.real 1208 else: 1209 deriv = jacobian(func)(values, **kwargs) 1210 1211 final_result = np.zeros(new_values.shape, dtype=object) 1212 1213 if array_mode is True: 1214 1215 class _Zero_grad(): 1216 def __init__(self, N): 1217 self.grad = np.zeros((N, 1)) 1218 1219 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1220 d_extracted = {} 1221 g_extracted = {} 1222 for name in new_sample_names: 1223 d_extracted[name] = [] 1224 ens_length = len(new_idl_d[name]) 1225 for i_dat, dat in enumerate(data): 1226 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1227 for name in new_cov_names: 1228 g_extracted[name] = [] 1229 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1230 for i_dat, dat in enumerate(data): 1231 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1232 1233 for i_val, new_val in np.ndenumerate(new_values): 1234 new_deltas = {} 1235 new_grad = {} 1236 if array_mode is True: 1237 for name in new_sample_names: 1238 ens_length = d_extracted[name][0].shape[-1] 1239 new_deltas[name] = np.zeros(ens_length) 1240 for i_dat, dat in enumerate(d_extracted[name]): 1241 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1242 for name in new_cov_names: 1243 new_grad[name] = 0 1244 for i_dat, dat in enumerate(g_extracted[name]): 1245 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1246 else: 1247 for j_obs, obs in np.ndenumerate(data): 1248 for name in obs.names: 1249 if name in obs.cov_names: 1250 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1251 else: 1252 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1253 1254 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1255 1256 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1257 raise Exception('The same name has been used for deltas and covobs!') 1258 new_samples = [] 1259 new_means = [] 1260 new_idl = [] 1261 new_names_obs = [] 1262 for name in new_names: 1263 if name not in new_covobs: 1264 new_samples.append(new_deltas[name]) 1265 new_idl.append(new_idl_d[name]) 1266 new_means.append(new_r_values[name][i_val]) 1267 new_names_obs.append(name) 1268 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1269 for name in new_covobs: 1270 final_result[i_val].names.append(name) 1271 final_result[i_val]._covobs = new_covobs 1272 final_result[i_val]._value = new_val 1273 final_result[i_val].reweighted = reweighted 1274 1275 if multi == 0: 1276 final_result = final_result.item() 1277 1278 return final_result 1279 1280 1281def _reduce_deltas(deltas, idx_old, idx_new): 1282 """Extract deltas defined on idx_old on all configs of idx_new. 1283 1284 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1285 are ordered in an ascending order. 1286 1287 Parameters 1288 ---------- 1289 deltas : list 1290 List of fluctuations 1291 idx_old : list 1292 List or range of configs on which the deltas are defined 1293 idx_new : list 1294 List of configs for which we want to extract the deltas. 1295 Has to be a subset of idx_old. 1296 """ 1297 if not len(deltas) == len(idx_old): 1298 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1299 if type(idx_old) is range and type(idx_new) is range: 1300 if idx_old == idx_new: 1301 return deltas 1302 # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical 1303 try: 1304 g = groupby([idx_old, idx_new]) 1305 if next(g, True) and not next(g, False): 1306 return deltas 1307 except Exception: 1308 pass 1309 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1310 if len(indices) < len(idx_new): 1311 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1312 return np.array(deltas)[indices] 1313 1314 1315def reweight(weight, obs, **kwargs): 1316 """Reweight a list of observables. 1317 1318 Parameters 1319 ---------- 1320 weight : Obs 1321 Reweighting factor. An Observable that has to be defined on a superset of the 1322 configurations in obs[i].idl for all i. 1323 obs : list 1324 list of Obs, e.g. [obs1, obs2, obs3]. 1325 all_configs : bool 1326 if True, the reweighted observables are normalized by the average of 1327 the reweighting factor on all configurations in weight.idl and not 1328 on the configurations in obs[i].idl. Default False. 1329 """ 1330 result = [] 1331 for i in range(len(obs)): 1332 if len(obs[i].cov_names): 1333 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1334 if not set(obs[i].names).issubset(weight.names): 1335 raise Exception('Error: Ensembles do not fit') 1336 for name in obs[i].names: 1337 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1338 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1339 new_samples = [] 1340 w_deltas = {} 1341 for name in sorted(obs[i].names): 1342 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1343 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1344 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1345 1346 if kwargs.get('all_configs'): 1347 new_weight = weight 1348 else: 1349 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1350 1351 result.append(tmp_obs / new_weight) 1352 result[-1].reweighted = True 1353 1354 return result 1355 1356 1357def correlate(obs_a, obs_b): 1358 """Correlate two observables. 1359 1360 Parameters 1361 ---------- 1362 obs_a : Obs 1363 First observable 1364 obs_b : Obs 1365 Second observable 1366 1367 Notes 1368 ----- 1369 Keep in mind to only correlate primary observables which have not been reweighted 1370 yet. The reweighting has to be applied after correlating the observables. 1371 Currently only works if ensembles are identical (this is not strictly necessary). 1372 """ 1373 1374 if sorted(obs_a.names) != sorted(obs_b.names): 1375 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1376 if len(obs_a.cov_names) or len(obs_b.cov_names): 1377 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1378 for name in obs_a.names: 1379 if obs_a.shape[name] != obs_b.shape[name]: 1380 raise Exception('Shapes of ensemble', name, 'do not fit') 1381 if obs_a.idl[name] != obs_b.idl[name]: 1382 raise Exception('idl of ensemble', name, 'do not fit') 1383 1384 if obs_a.reweighted is True: 1385 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1386 if obs_b.reweighted is True: 1387 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1388 1389 new_samples = [] 1390 new_idl = [] 1391 for name in sorted(obs_a.names): 1392 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1393 new_idl.append(obs_a.idl[name]) 1394 1395 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1396 o.reweighted = obs_a.reweighted or obs_b.reweighted 1397 return o 1398 1399 1400def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1401 r'''Calculates the error covariance matrix of a set of observables. 1402 1403 WARNING: This function should be used with care, especially for observables with support on multiple 1404 ensembles with differing autocorrelations. See the notes below for details. 1405 1406 The gamma method has to be applied first to all observables. 1407 1408 Parameters 1409 ---------- 1410 obs : list or numpy.ndarray 1411 List or one dimensional array of Obs 1412 visualize : bool 1413 If True plots the corresponding normalized correlation matrix (default False). 1414 correlation : bool 1415 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1416 smooth : None or int 1417 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1418 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1419 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1420 small ones. 1421 1422 Notes 1423 ----- 1424 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1425 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1426 in the absence of autocorrelation. 1427 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1428 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1429 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1430 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1431 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1432 ''' 1433 1434 length = len(obs) 1435 1436 max_samples = np.max([o.N for o in obs]) 1437 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1438 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1439 1440 cov = np.zeros((length, length)) 1441 for i in range(length): 1442 for j in range(i, length): 1443 cov[i, j] = _covariance_element(obs[i], obs[j]) 1444 cov = cov + cov.T - np.diag(np.diag(cov)) 1445 1446 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1447 1448 if isinstance(smooth, int): 1449 corr = _smooth_eigenvalues(corr, smooth) 1450 1451 if visualize: 1452 plt.matshow(corr, vmin=-1, vmax=1) 1453 plt.set_cmap('RdBu') 1454 plt.colorbar() 1455 plt.draw() 1456 1457 if correlation is True: 1458 return corr 1459 1460 errors = [o.dvalue for o in obs] 1461 cov = np.diag(errors) @ corr @ np.diag(errors) 1462 1463 eigenvalues = np.linalg.eigh(cov)[0] 1464 if not np.all(eigenvalues >= 0): 1465 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1466 1467 return cov 1468 1469 1470def _smooth_eigenvalues(corr, E): 1471 """Eigenvalue smoothing as described in hep-lat/9412087 1472 1473 corr : np.ndarray 1474 correlation matrix 1475 E : integer 1476 Number of eigenvalues to be left substantially unchanged 1477 """ 1478 if not (2 < E < corr.shape[0] - 1): 1479 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1480 vals, vec = np.linalg.eigh(corr) 1481 lambda_min = np.mean(vals[:-E]) 1482 vals[vals < lambda_min] = lambda_min 1483 vals /= np.mean(vals) 1484 return vec @ np.diag(vals) @ vec.T 1485 1486 1487def _covariance_element(obs1, obs2): 1488 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1489 1490 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1491 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1492 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1493 return np.sum(deltas1 * deltas2) 1494 1495 if set(obs1.names).isdisjoint(set(obs2.names)): 1496 return 0.0 1497 1498 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1499 raise Exception('The gamma method has to be applied to both Obs first.') 1500 1501 dvalue = 0.0 1502 1503 for e_name in obs1.mc_names: 1504 1505 if e_name not in obs2.mc_names: 1506 continue 1507 1508 idl_d = {} 1509 for r_name in obs1.e_content[e_name]: 1510 if r_name not in obs2.e_content[e_name]: 1511 continue 1512 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1513 1514 gamma = 0.0 1515 1516 for r_name in obs1.e_content[e_name]: 1517 if r_name not in obs2.e_content[e_name]: 1518 continue 1519 if len(idl_d[r_name]) == 0: 1520 continue 1521 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1522 1523 if gamma == 0.0: 1524 continue 1525 1526 gamma_div = 0.0 1527 for r_name in obs1.e_content[e_name]: 1528 if r_name not in obs2.e_content[e_name]: 1529 continue 1530 if len(idl_d[r_name]) == 0: 1531 continue 1532 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1533 gamma /= gamma_div 1534 1535 dvalue += gamma 1536 1537 for e_name in obs1.cov_names: 1538 1539 if e_name not in obs2.cov_names: 1540 continue 1541 1542 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1543 1544 return dvalue 1545 1546 1547def import_jackknife(jacks, name, idl=None): 1548 """Imports jackknife samples and returns an Obs 1549 1550 Parameters 1551 ---------- 1552 jacks : numpy.ndarray 1553 numpy array containing the mean value as zeroth entry and 1554 the N jackknife samples as first to Nth entry. 1555 name : str 1556 name of the ensemble the samples are defined on. 1557 """ 1558 length = len(jacks) - 1 1559 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1560 samples = jacks[1:] @ prj 1561 mean = np.mean(samples) 1562 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1563 new_obs._value = jacks[0] 1564 return new_obs 1565 1566 1567def merge_obs(list_of_obs): 1568 """Combine all observables in list_of_obs into one new observable 1569 1570 Parameters 1571 ---------- 1572 list_of_obs : list 1573 list of the Obs object to be combined 1574 1575 Notes 1576 ----- 1577 It is not possible to combine obs which are based on the same replicum 1578 """ 1579 replist = [item for obs in list_of_obs for item in obs.names] 1580 if (len(replist) == len(set(replist))) is False: 1581 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1582 if any([len(o.cov_names) for o in list_of_obs]): 1583 raise Exception('Not possible to merge data that contains covobs!') 1584 new_dict = {} 1585 idl_dict = {} 1586 for o in list_of_obs: 1587 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1588 for key in set(o.deltas) | set(o.r_values)}) 1589 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1590 1591 names = sorted(new_dict.keys()) 1592 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1593 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1594 return o 1595 1596 1597def cov_Obs(means, cov, name, grad=None): 1598 """Create an Obs based on mean(s) and a covariance matrix 1599 1600 Parameters 1601 ---------- 1602 mean : list of floats or float 1603 N mean value(s) of the new Obs 1604 cov : list or array 1605 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1606 name : str 1607 identifier for the covariance matrix 1608 grad : list or array 1609 Gradient of the Covobs wrt. the means belonging to cov. 1610 """ 1611 1612 def covobs_to_obs(co): 1613 """Make an Obs out of a Covobs 1614 1615 Parameters 1616 ---------- 1617 co : Covobs 1618 Covobs to be embedded into the Obs 1619 """ 1620 o = Obs([], [], means=[]) 1621 o._value = co.value 1622 o.names.append(co.name) 1623 o._covobs[co.name] = co 1624 o._dvalue = np.sqrt(co.errsq()) 1625 return o 1626 1627 ol = [] 1628 if isinstance(means, (float, int)): 1629 means = [means] 1630 1631 for i in range(len(means)): 1632 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1633 if ol[0].covobs[name].N != len(means): 1634 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1635 if len(ol) == 1: 1636 return ol[0] 1637 return ol 1638 1639 1640def _determine_gap(o, e_content, e_name): 1641 gaps = [] 1642 for r_name in e_content[e_name]: 1643 if isinstance(o.idl[r_name], range): 1644 gaps.append(o.idl[r_name].step) 1645 else: 1646 gaps.append(np.min(np.diff(o.idl[r_name]))) 1647 1648 gap = min(gaps) 1649 if not np.all([gi % gap == 0 for gi in gaps]): 1650 raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) 1651 1652 return gap
20class Obs: 21 """Class for a general observable. 22 23 Instances of Obs are the basic objects of a pyerrors error analysis. 24 They are initialized with a list which contains arrays of samples for 25 different ensembles/replica and another list of same length which contains 26 the names of the ensembles/replica. Mathematical operations can be 27 performed on instances. The result is another instance of Obs. The error of 28 an instance can be computed with the gamma_method. Also contains additional 29 methods for output and visualization of the error calculation. 30 31 Attributes 32 ---------- 33 S_global : float 34 Standard value for S (default 2.0) 35 S_dict : dict 36 Dictionary for S values. If an entry for a given ensemble 37 exists this overwrites the standard value for that ensemble. 38 tau_exp_global : float 39 Standard value for tau_exp (default 0.0) 40 tau_exp_dict : dict 41 Dictionary for tau_exp values. If an entry for a given ensemble exists 42 this overwrites the standard value for that ensemble. 43 N_sigma_global : float 44 Standard value for N_sigma (default 1.0) 45 N_sigma_dict : dict 46 Dictionary for N_sigma values. If an entry for a given ensemble exists 47 this overwrites the standard value for that ensemble. 48 """ 49 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 50 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 51 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 52 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 53 'idl', 'tag', '_covobs', '__dict__'] 54 55 S_global = 2.0 56 S_dict = {} 57 tau_exp_global = 0.0 58 tau_exp_dict = {} 59 N_sigma_global = 1.0 60 N_sigma_dict = {} 61 62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise ValueError('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise ValueError('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise ValueError('Names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise ValueError('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.idl = {} 102 if idl is not None: 103 for name, idx in sorted(zip(names, idl)): 104 if isinstance(idx, range): 105 self.idl[name] = idx 106 elif isinstance(idx, (list, np.ndarray)): 107 dc = np.unique(np.diff(idx)) 108 if np.any(dc < 0): 109 raise ValueError("Unsorted idx for idl[%s]" % (name)) 110 if len(dc) == 1: 111 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 112 else: 113 self.idl[name] = list(idx) 114 else: 115 raise TypeError('incompatible type for idl[%s].' % (name)) 116 else: 117 for name, sample in sorted(zip(names, samples)): 118 self.idl[name] = range(1, len(sample) + 1) 119 120 if kwargs.get("means") is not None: 121 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 self.r_values[name] = mean 125 self.deltas[name] = sample 126 else: 127 for name, sample in sorted(zip(names, samples)): 128 self.shape[name] = len(self.idl[name]) 129 self.N += self.shape[name] 130 if len(sample) != self.shape[name]: 131 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 132 self.r_values[name] = np.mean(sample) 133 self.deltas[name] = sample - self.r_values[name] 134 self._value += self.shape[name] * self.r_values[name] 135 self._value /= self.N 136 137 self._dvalue = 0.0 138 self.ddvalue = 0.0 139 self.reweighted = False 140 141 self.tag = None 142 143 @property 144 def value(self): 145 return self._value 146 147 @property 148 def dvalue(self): 149 return self._dvalue 150 151 @property 152 def e_names(self): 153 return sorted(set([o.split('|')[0] for o in self.names])) 154 155 @property 156 def cov_names(self): 157 return sorted(set([o for o in self.covobs.keys()])) 158 159 @property 160 def mc_names(self): 161 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 162 163 @property 164 def e_content(self): 165 res = {} 166 for e, e_name in enumerate(self.e_names): 167 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 168 if e_name in self.names: 169 res[e_name].append(e_name) 170 return res 171 172 @property 173 def covobs(self): 174 return self._covobs 175 176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 gapsize = _determine_gap(self, e_content, e_name) 243 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 275 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 276 # Make sure no entry of tauint is smaller than 0.5 277 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 278 # hep-lat/0306017 eq. (42) 279 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 280 self.e_n_dtauint[e_name][0] = 0.0 281 282 def _compute_drho(i): 283 tmp = (self.e_rho[e_name][i + 1:w_max] 284 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 285 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 286 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 287 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 288 289 if self.tau_exp[e_name] > 0: 290 _compute_drho(1) 291 texp = self.tau_exp[e_name] 292 # Critical slowing down analysis 293 if w_max // 2 <= 1: 294 raise Exception("Need at least 8 samples for tau_exp error analysis") 295 for n in range(1, w_max // 2): 296 _compute_drho(n + 1) 297 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 298 # Bias correction hep-lat/0306017 eq. (49) included 299 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 300 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 301 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 302 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 303 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 304 self.e_windowsize[e_name] = n 305 break 306 else: 307 if self.S[e_name] == 0.0: 308 self.e_tauint[e_name] = 0.5 309 self.e_dtauint[e_name] = 0.0 310 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 312 self.e_windowsize[e_name] = 0 313 else: 314 # Standard automatic windowing procedure 315 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 316 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 317 for n in range(1, w_max): 318 if g_w[n - 1] < 0 or n >= w_max - 1: 319 _compute_drho(n) 320 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 321 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 322 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 323 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 324 self.e_windowsize[e_name] = n 325 break 326 327 self._dvalue += self.e_dvalue[e_name] ** 2 328 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 329 330 for e_name in self.cov_names: 331 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 332 self.e_ddvalue[e_name] = 0 333 self._dvalue += self.e_dvalue[e_name]**2 334 335 self._dvalue = np.sqrt(self._dvalue) 336 if self._dvalue == 0.0: 337 self.ddvalue = 0.0 338 else: 339 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 340 return 341 342 gm = gamma_method 343 344 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 345 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 346 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 347 348 Parameters 349 ---------- 350 deltas : list 351 List of fluctuations 352 idx : list 353 List or range of configurations on which the deltas are defined. 354 shape : int 355 Number of configurations in idx. 356 w_max : int 357 Upper bound for the summation window. 358 fft : bool 359 determines whether the fft algorithm is used for the computation 360 of the autocorrelation function. 361 gapsize : int 362 The target distance between two configurations. If longer distances 363 are found in idx, the data is expanded. 364 """ 365 gamma = np.zeros(w_max) 366 deltas = _expand_deltas(deltas, idx, shape, gapsize) 367 new_shape = len(deltas) 368 if fft: 369 max_gamma = min(new_shape, w_max) 370 # The padding for the fft has to be even 371 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 372 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 373 else: 374 for n in range(w_max): 375 if new_shape - n >= 0: 376 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 377 378 return gamma 379 380 def details(self, ens_content=True): 381 """Output detailed properties of the Obs. 382 383 Parameters 384 ---------- 385 ens_content : bool 386 print details about the ensembles and replica if true. 387 """ 388 if self.tag is not None: 389 print("Description:", self.tag) 390 if not hasattr(self, 'e_dvalue'): 391 print('Result\t %3.8e' % (self.value)) 392 else: 393 if self.value == 0.0: 394 percentage = np.nan 395 else: 396 percentage = np.abs(self._dvalue / self.value) * 100 397 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 398 if len(self.e_names) > 1: 399 print(' Ensemble errors:') 400 e_content = self.e_content 401 for e_name in self.mc_names: 402 gap = _determine_gap(self, e_content, e_name) 403 404 if len(self.e_names) > 1: 405 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 406 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 407 tau_string += f" in units of {gap} config" 408 if gap > 1: 409 tau_string += "s" 410 if self.tau_exp[e_name] > 0: 411 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 412 else: 413 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 414 print(tau_string) 415 for e_name in self.cov_names: 416 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 417 if ens_content is True: 418 if len(self.e_names) == 1: 419 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 420 else: 421 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 422 my_string_list = [] 423 for key, value in sorted(self.e_content.items()): 424 if key not in self.covobs: 425 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 426 if len(value) == 1: 427 my_string += f': {self.shape[value[0]]} configurations' 428 if isinstance(self.idl[value[0]], range): 429 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 430 else: 431 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 432 else: 433 sublist = [] 434 for v in value: 435 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 436 my_substring += f': {self.shape[v]} configurations' 437 if isinstance(self.idl[v], range): 438 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 439 else: 440 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 441 sublist.append(my_substring) 442 443 my_string += '\n' + '\n'.join(sublist) 444 else: 445 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 446 my_string_list.append(my_string) 447 print('\n'.join(my_string_list)) 448 449 def reweight(self, weight): 450 """Reweight the obs with given rewighting factors. 451 452 Parameters 453 ---------- 454 weight : Obs 455 Reweighting factor. An Observable that has to be defined on a superset of the 456 configurations in obs[i].idl for all i. 457 all_configs : bool 458 if True, the reweighted observables are normalized by the average of 459 the reweighting factor on all configurations in weight.idl and not 460 on the configurations in obs[i].idl. Default False. 461 """ 462 return reweight(weight, [self])[0] 463 464 def is_zero_within_error(self, sigma=1): 465 """Checks whether the observable is zero within 'sigma' standard errors. 466 467 Parameters 468 ---------- 469 sigma : int 470 Number of standard errors used for the check. 471 472 Works only properly when the gamma method was run. 473 """ 474 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 475 476 def is_zero(self, atol=1e-10): 477 """Checks whether the observable is zero within a given tolerance. 478 479 Parameters 480 ---------- 481 atol : float 482 Absolute tolerance (for details see numpy documentation). 483 """ 484 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 485 486 def plot_tauint(self, save=None): 487 """Plot integrated autocorrelation time for each ensemble. 488 489 Parameters 490 ---------- 491 save : str 492 saves the figure to a file named 'save' if. 493 """ 494 if not hasattr(self, 'e_dvalue'): 495 raise Exception('Run the gamma method first.') 496 497 for e, e_name in enumerate(self.mc_names): 498 fig = plt.figure() 499 plt.xlabel(r'$W$') 500 plt.ylabel(r'$\tau_\mathrm{int}$') 501 length = int(len(self.e_n_tauint[e_name])) 502 if self.tau_exp[e_name] > 0: 503 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 504 x_help = np.arange(2 * self.tau_exp[e_name]) 505 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 506 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 507 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 508 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 509 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 510 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 511 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 512 else: 513 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 514 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 515 516 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 517 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 518 plt.legend() 519 plt.xlim(-0.5, xmax) 520 ylim = plt.ylim() 521 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 522 plt.draw() 523 if save: 524 fig.savefig(save + "_" + str(e)) 525 526 def plot_rho(self, save=None): 527 """Plot normalized autocorrelation function time for each ensemble. 528 529 Parameters 530 ---------- 531 save : str 532 saves the figure to a file named 'save' if. 533 """ 534 if not hasattr(self, 'e_dvalue'): 535 raise Exception('Run the gamma method first.') 536 for e, e_name in enumerate(self.mc_names): 537 fig = plt.figure() 538 plt.xlabel('W') 539 plt.ylabel('rho') 540 length = int(len(self.e_drho[e_name])) 541 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 542 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 543 if self.tau_exp[e_name] > 0: 544 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 545 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 546 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 547 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 548 else: 549 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 550 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 551 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 552 plt.xlim(-0.5, xmax) 553 plt.draw() 554 if save: 555 fig.savefig(save + "_" + str(e)) 556 557 def plot_rep_dist(self): 558 """Plot replica distribution for each ensemble with more than one replicum.""" 559 if not hasattr(self, 'e_dvalue'): 560 raise Exception('Run the gamma method first.') 561 for e, e_name in enumerate(self.mc_names): 562 if len(self.e_content[e_name]) == 1: 563 print('No replica distribution for a single replicum (', e_name, ')') 564 continue 565 r_length = [] 566 sub_r_mean = 0 567 for r, r_name in enumerate(self.e_content[e_name]): 568 r_length.append(len(self.deltas[r_name])) 569 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 570 e_N = np.sum(r_length) 571 sub_r_mean /= e_N 572 arr = np.zeros(len(self.e_content[e_name])) 573 for r, r_name in enumerate(self.e_content[e_name]): 574 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 575 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 576 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 577 plt.draw() 578 579 def plot_history(self, expand=True): 580 """Plot derived Monte Carlo history for each ensemble 581 582 Parameters 583 ---------- 584 expand : bool 585 show expanded history for irregular Monte Carlo chains (default: True). 586 """ 587 for e, e_name in enumerate(self.mc_names): 588 plt.figure() 589 r_length = [] 590 tmp = [] 591 tmp_expanded = [] 592 for r, r_name in enumerate(self.e_content[e_name]): 593 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 594 if expand: 595 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 596 r_length.append(len(tmp_expanded[-1])) 597 else: 598 r_length.append(len(tmp[-1])) 599 e_N = np.sum(r_length) 600 x = np.arange(e_N) 601 y_test = np.concatenate(tmp, axis=0) 602 if expand: 603 y = np.concatenate(tmp_expanded, axis=0) 604 else: 605 y = y_test 606 plt.errorbar(x, y, fmt='.', markersize=3) 607 plt.xlim(-0.5, e_N - 0.5) 608 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 609 plt.draw() 610 611 def plot_piechart(self, save=None): 612 """Plot piechart which shows the fractional contribution of each 613 ensemble to the error and returns a dictionary containing the fractions. 614 615 Parameters 616 ---------- 617 save : str 618 saves the figure to a file named 'save' if. 619 """ 620 if not hasattr(self, 'e_dvalue'): 621 raise Exception('Run the gamma method first.') 622 if np.isclose(0.0, self._dvalue, atol=1e-15): 623 raise Exception('Error is 0.0') 624 labels = self.e_names 625 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 626 fig1, ax1 = plt.subplots() 627 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 628 ax1.axis('equal') 629 plt.draw() 630 if save: 631 fig1.savefig(save) 632 633 return dict(zip(self.e_names, sizes)) 634 635 def dump(self, filename, datatype="json.gz", description="", **kwargs): 636 """Dump the Obs to a file 'name' of chosen format. 637 638 Parameters 639 ---------- 640 filename : str 641 name of the file to be saved. 642 datatype : str 643 Format of the exported file. Supported formats include 644 "json.gz" and "pickle" 645 description : str 646 Description for output file, only relevant for json.gz format. 647 path : str 648 specifies a custom path for the file (default '.') 649 """ 650 if 'path' in kwargs: 651 file_name = kwargs.get('path') + '/' + filename 652 else: 653 file_name = filename 654 655 if datatype == "json.gz": 656 from .input.json import dump_to_json 657 dump_to_json([self], file_name, description=description) 658 elif datatype == "pickle": 659 with open(file_name + '.p', 'wb') as fb: 660 pickle.dump(self, fb) 661 else: 662 raise Exception("Unknown datatype " + str(datatype)) 663 664 def export_jackknife(self): 665 """Export jackknife samples from the Obs 666 667 Returns 668 ------- 669 numpy.ndarray 670 Returns a numpy array of length N + 1 where N is the number of samples 671 for the given ensemble and replicum. The zeroth entry of the array contains 672 the mean value of the Obs, entries 1 to N contain the N jackknife samples 673 derived from the Obs. The current implementation only works for observables 674 defined on exactly one ensemble and replicum. The derived jackknife samples 675 should agree with samples from a full jackknife analysis up to O(1/N). 676 """ 677 678 if len(self.names) != 1: 679 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 680 681 name = self.names[0] 682 full_data = self.deltas[name] + self.r_values[name] 683 n = full_data.size 684 mean = self.value 685 tmp_jacks = np.zeros(n + 1) 686 tmp_jacks[0] = mean 687 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 688 return tmp_jacks 689 690 def __float__(self): 691 return float(self.value) 692 693 def __repr__(self): 694 return 'Obs[' + str(self) + ']' 695 696 def __str__(self): 697 return _format_uncertainty(self.value, self._dvalue) 698 699 def __format__(self, format_type): 700 my_str = _format_uncertainty(self.value, self._dvalue, 701 significance=int(float(format_type.replace("+", "").replace("-", "")))) 702 for char in ["+", " "]: 703 if format_type.startswith(char): 704 if my_str[0] != "-": 705 my_str = char + my_str 706 return my_str 707 708 def __hash__(self): 709 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 710 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 711 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 712 hash_tuple += tuple([o.encode() for o in self.names]) 713 m = hashlib.md5() 714 [m.update(o) for o in hash_tuple] 715 return int(m.hexdigest(), 16) & 0xFFFFFFFF 716 717 # Overload comparisons 718 def __lt__(self, other): 719 return self.value < other 720 721 def __le__(self, other): 722 return self.value <= other 723 724 def __gt__(self, other): 725 return self.value > other 726 727 def __ge__(self, other): 728 return self.value >= other 729 730 def __eq__(self, other): 731 return (self - other).is_zero() 732 733 def __ne__(self, other): 734 return not (self - other).is_zero() 735 736 # Overload math operations 737 def __add__(self, y): 738 if isinstance(y, Obs): 739 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 740 else: 741 if isinstance(y, np.ndarray): 742 return np.array([self + o for o in y]) 743 elif y.__class__.__name__ in ['Corr', 'CObs']: 744 return NotImplemented 745 else: 746 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 747 748 def __radd__(self, y): 749 return self + y 750 751 def __mul__(self, y): 752 if isinstance(y, Obs): 753 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 754 else: 755 if isinstance(y, np.ndarray): 756 return np.array([self * o for o in y]) 757 elif isinstance(y, complex): 758 return CObs(self * y.real, self * y.imag) 759 elif y.__class__.__name__ in ['Corr', 'CObs']: 760 return NotImplemented 761 else: 762 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 763 764 def __rmul__(self, y): 765 return self * y 766 767 def __sub__(self, y): 768 if isinstance(y, Obs): 769 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 770 else: 771 if isinstance(y, np.ndarray): 772 return np.array([self - o for o in y]) 773 elif y.__class__.__name__ in ['Corr', 'CObs']: 774 return NotImplemented 775 else: 776 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 777 778 def __rsub__(self, y): 779 return -1 * (self - y) 780 781 def __pos__(self): 782 return self 783 784 def __neg__(self): 785 return -1 * self 786 787 def __truediv__(self, y): 788 if isinstance(y, Obs): 789 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 790 else: 791 if isinstance(y, np.ndarray): 792 return np.array([self / o for o in y]) 793 elif y.__class__.__name__ in ['Corr', 'CObs']: 794 return NotImplemented 795 else: 796 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 797 798 def __rtruediv__(self, y): 799 if isinstance(y, Obs): 800 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 801 else: 802 if isinstance(y, np.ndarray): 803 return np.array([o / self for o in y]) 804 elif y.__class__.__name__ in ['Corr', 'CObs']: 805 return NotImplemented 806 else: 807 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 808 809 def __pow__(self, y): 810 if isinstance(y, Obs): 811 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 812 else: 813 return derived_observable(lambda x: x[0] ** y, [self]) 814 815 def __rpow__(self, y): 816 if isinstance(y, Obs): 817 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 818 else: 819 return derived_observable(lambda x: y ** x[0], [self]) 820 821 def __abs__(self): 822 return derived_observable(lambda x: anp.abs(x[0]), [self]) 823 824 # Overload numpy functions 825 def sqrt(self): 826 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 827 828 def log(self): 829 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 830 831 def exp(self): 832 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 833 834 def sin(self): 835 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 836 837 def cos(self): 838 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 839 840 def tan(self): 841 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 842 843 def arcsin(self): 844 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 845 846 def arccos(self): 847 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 848 849 def arctan(self): 850 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 851 852 def sinh(self): 853 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 854 855 def cosh(self): 856 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 857 858 def tanh(self): 859 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 860 861 def arcsinh(self): 862 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 863 864 def arccosh(self): 865 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 866 867 def arctanh(self): 868 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise ValueError('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise ValueError('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise ValueError('Names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise ValueError('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.idl = {} 102 if idl is not None: 103 for name, idx in sorted(zip(names, idl)): 104 if isinstance(idx, range): 105 self.idl[name] = idx 106 elif isinstance(idx, (list, np.ndarray)): 107 dc = np.unique(np.diff(idx)) 108 if np.any(dc < 0): 109 raise ValueError("Unsorted idx for idl[%s]" % (name)) 110 if len(dc) == 1: 111 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 112 else: 113 self.idl[name] = list(idx) 114 else: 115 raise TypeError('incompatible type for idl[%s].' % (name)) 116 else: 117 for name, sample in sorted(zip(names, samples)): 118 self.idl[name] = range(1, len(sample) + 1) 119 120 if kwargs.get("means") is not None: 121 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 self.r_values[name] = mean 125 self.deltas[name] = sample 126 else: 127 for name, sample in sorted(zip(names, samples)): 128 self.shape[name] = len(self.idl[name]) 129 self.N += self.shape[name] 130 if len(sample) != self.shape[name]: 131 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 132 self.r_values[name] = np.mean(sample) 133 self.deltas[name] = sample - self.r_values[name] 134 self._value += self.shape[name] * self.r_values[name] 135 self._value /= self.N 136 137 self._dvalue = 0.0 138 self.ddvalue = 0.0 139 self.reweighted = False 140 141 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 gapsize = _determine_gap(self, e_content, e_name) 243 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 275 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 276 # Make sure no entry of tauint is smaller than 0.5 277 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 278 # hep-lat/0306017 eq. (42) 279 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 280 self.e_n_dtauint[e_name][0] = 0.0 281 282 def _compute_drho(i): 283 tmp = (self.e_rho[e_name][i + 1:w_max] 284 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 285 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 286 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 287 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 288 289 if self.tau_exp[e_name] > 0: 290 _compute_drho(1) 291 texp = self.tau_exp[e_name] 292 # Critical slowing down analysis 293 if w_max // 2 <= 1: 294 raise Exception("Need at least 8 samples for tau_exp error analysis") 295 for n in range(1, w_max // 2): 296 _compute_drho(n + 1) 297 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 298 # Bias correction hep-lat/0306017 eq. (49) included 299 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 300 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 301 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 302 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 303 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 304 self.e_windowsize[e_name] = n 305 break 306 else: 307 if self.S[e_name] == 0.0: 308 self.e_tauint[e_name] = 0.5 309 self.e_dtauint[e_name] = 0.0 310 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 312 self.e_windowsize[e_name] = 0 313 else: 314 # Standard automatic windowing procedure 315 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 316 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 317 for n in range(1, w_max): 318 if g_w[n - 1] < 0 or n >= w_max - 1: 319 _compute_drho(n) 320 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 321 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 322 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 323 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 324 self.e_windowsize[e_name] = n 325 break 326 327 self._dvalue += self.e_dvalue[e_name] ** 2 328 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 329 330 for e_name in self.cov_names: 331 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 332 self.e_ddvalue[e_name] = 0 333 self._dvalue += self.e_dvalue[e_name]**2 334 335 self._dvalue = np.sqrt(self._dvalue) 336 if self._dvalue == 0.0: 337 self.ddvalue = 0.0 338 else: 339 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 340 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 gapsize = _determine_gap(self, e_content, e_name) 243 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 275 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 276 # Make sure no entry of tauint is smaller than 0.5 277 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 278 # hep-lat/0306017 eq. (42) 279 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 280 self.e_n_dtauint[e_name][0] = 0.0 281 282 def _compute_drho(i): 283 tmp = (self.e_rho[e_name][i + 1:w_max] 284 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 285 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 286 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 287 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 288 289 if self.tau_exp[e_name] > 0: 290 _compute_drho(1) 291 texp = self.tau_exp[e_name] 292 # Critical slowing down analysis 293 if w_max // 2 <= 1: 294 raise Exception("Need at least 8 samples for tau_exp error analysis") 295 for n in range(1, w_max // 2): 296 _compute_drho(n + 1) 297 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 298 # Bias correction hep-lat/0306017 eq. (49) included 299 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 300 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 301 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 302 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 303 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 304 self.e_windowsize[e_name] = n 305 break 306 else: 307 if self.S[e_name] == 0.0: 308 self.e_tauint[e_name] = 0.5 309 self.e_dtauint[e_name] = 0.0 310 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 312 self.e_windowsize[e_name] = 0 313 else: 314 # Standard automatic windowing procedure 315 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 316 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 317 for n in range(1, w_max): 318 if g_w[n - 1] < 0 or n >= w_max - 1: 319 _compute_drho(n) 320 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 321 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 322 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 323 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 324 self.e_windowsize[e_name] = n 325 break 326 327 self._dvalue += self.e_dvalue[e_name] ** 2 328 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 329 330 for e_name in self.cov_names: 331 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 332 self.e_ddvalue[e_name] = 0 333 self._dvalue += self.e_dvalue[e_name]**2 334 335 self._dvalue = np.sqrt(self._dvalue) 336 if self._dvalue == 0.0: 337 self.ddvalue = 0.0 338 else: 339 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 340 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
380 def details(self, ens_content=True): 381 """Output detailed properties of the Obs. 382 383 Parameters 384 ---------- 385 ens_content : bool 386 print details about the ensembles and replica if true. 387 """ 388 if self.tag is not None: 389 print("Description:", self.tag) 390 if not hasattr(self, 'e_dvalue'): 391 print('Result\t %3.8e' % (self.value)) 392 else: 393 if self.value == 0.0: 394 percentage = np.nan 395 else: 396 percentage = np.abs(self._dvalue / self.value) * 100 397 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 398 if len(self.e_names) > 1: 399 print(' Ensemble errors:') 400 e_content = self.e_content 401 for e_name in self.mc_names: 402 gap = _determine_gap(self, e_content, e_name) 403 404 if len(self.e_names) > 1: 405 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 406 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 407 tau_string += f" in units of {gap} config" 408 if gap > 1: 409 tau_string += "s" 410 if self.tau_exp[e_name] > 0: 411 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 412 else: 413 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 414 print(tau_string) 415 for e_name in self.cov_names: 416 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 417 if ens_content is True: 418 if len(self.e_names) == 1: 419 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 420 else: 421 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 422 my_string_list = [] 423 for key, value in sorted(self.e_content.items()): 424 if key not in self.covobs: 425 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 426 if len(value) == 1: 427 my_string += f': {self.shape[value[0]]} configurations' 428 if isinstance(self.idl[value[0]], range): 429 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 430 else: 431 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 432 else: 433 sublist = [] 434 for v in value: 435 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 436 my_substring += f': {self.shape[v]} configurations' 437 if isinstance(self.idl[v], range): 438 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 439 else: 440 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 441 sublist.append(my_substring) 442 443 my_string += '\n' + '\n'.join(sublist) 444 else: 445 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 446 my_string_list.append(my_string) 447 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
449 def reweight(self, weight): 450 """Reweight the obs with given rewighting factors. 451 452 Parameters 453 ---------- 454 weight : Obs 455 Reweighting factor. An Observable that has to be defined on a superset of the 456 configurations in obs[i].idl for all i. 457 all_configs : bool 458 if True, the reweighted observables are normalized by the average of 459 the reweighting factor on all configurations in weight.idl and not 460 on the configurations in obs[i].idl. Default False. 461 """ 462 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
464 def is_zero_within_error(self, sigma=1): 465 """Checks whether the observable is zero within 'sigma' standard errors. 466 467 Parameters 468 ---------- 469 sigma : int 470 Number of standard errors used for the check. 471 472 Works only properly when the gamma method was run. 473 """ 474 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
476 def is_zero(self, atol=1e-10): 477 """Checks whether the observable is zero within a given tolerance. 478 479 Parameters 480 ---------- 481 atol : float 482 Absolute tolerance (for details see numpy documentation). 483 """ 484 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
486 def plot_tauint(self, save=None): 487 """Plot integrated autocorrelation time for each ensemble. 488 489 Parameters 490 ---------- 491 save : str 492 saves the figure to a file named 'save' if. 493 """ 494 if not hasattr(self, 'e_dvalue'): 495 raise Exception('Run the gamma method first.') 496 497 for e, e_name in enumerate(self.mc_names): 498 fig = plt.figure() 499 plt.xlabel(r'$W$') 500 plt.ylabel(r'$\tau_\mathrm{int}$') 501 length = int(len(self.e_n_tauint[e_name])) 502 if self.tau_exp[e_name] > 0: 503 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 504 x_help = np.arange(2 * self.tau_exp[e_name]) 505 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 506 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 507 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 508 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 509 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 510 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 511 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 512 else: 513 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 514 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 515 516 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 517 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 518 plt.legend() 519 plt.xlim(-0.5, xmax) 520 ylim = plt.ylim() 521 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 522 plt.draw() 523 if save: 524 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
526 def plot_rho(self, save=None): 527 """Plot normalized autocorrelation function time for each ensemble. 528 529 Parameters 530 ---------- 531 save : str 532 saves the figure to a file named 'save' if. 533 """ 534 if not hasattr(self, 'e_dvalue'): 535 raise Exception('Run the gamma method first.') 536 for e, e_name in enumerate(self.mc_names): 537 fig = plt.figure() 538 plt.xlabel('W') 539 plt.ylabel('rho') 540 length = int(len(self.e_drho[e_name])) 541 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 542 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 543 if self.tau_exp[e_name] > 0: 544 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 545 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 546 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 547 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 548 else: 549 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 550 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 551 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 552 plt.xlim(-0.5, xmax) 553 plt.draw() 554 if save: 555 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
557 def plot_rep_dist(self): 558 """Plot replica distribution for each ensemble with more than one replicum.""" 559 if not hasattr(self, 'e_dvalue'): 560 raise Exception('Run the gamma method first.') 561 for e, e_name in enumerate(self.mc_names): 562 if len(self.e_content[e_name]) == 1: 563 print('No replica distribution for a single replicum (', e_name, ')') 564 continue 565 r_length = [] 566 sub_r_mean = 0 567 for r, r_name in enumerate(self.e_content[e_name]): 568 r_length.append(len(self.deltas[r_name])) 569 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 570 e_N = np.sum(r_length) 571 sub_r_mean /= e_N 572 arr = np.zeros(len(self.e_content[e_name])) 573 for r, r_name in enumerate(self.e_content[e_name]): 574 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 575 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 576 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 577 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
579 def plot_history(self, expand=True): 580 """Plot derived Monte Carlo history for each ensemble 581 582 Parameters 583 ---------- 584 expand : bool 585 show expanded history for irregular Monte Carlo chains (default: True). 586 """ 587 for e, e_name in enumerate(self.mc_names): 588 plt.figure() 589 r_length = [] 590 tmp = [] 591 tmp_expanded = [] 592 for r, r_name in enumerate(self.e_content[e_name]): 593 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 594 if expand: 595 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 596 r_length.append(len(tmp_expanded[-1])) 597 else: 598 r_length.append(len(tmp[-1])) 599 e_N = np.sum(r_length) 600 x = np.arange(e_N) 601 y_test = np.concatenate(tmp, axis=0) 602 if expand: 603 y = np.concatenate(tmp_expanded, axis=0) 604 else: 605 y = y_test 606 plt.errorbar(x, y, fmt='.', markersize=3) 607 plt.xlim(-0.5, e_N - 0.5) 608 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 609 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
611 def plot_piechart(self, save=None): 612 """Plot piechart which shows the fractional contribution of each 613 ensemble to the error and returns a dictionary containing the fractions. 614 615 Parameters 616 ---------- 617 save : str 618 saves the figure to a file named 'save' if. 619 """ 620 if not hasattr(self, 'e_dvalue'): 621 raise Exception('Run the gamma method first.') 622 if np.isclose(0.0, self._dvalue, atol=1e-15): 623 raise Exception('Error is 0.0') 624 labels = self.e_names 625 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 626 fig1, ax1 = plt.subplots() 627 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 628 ax1.axis('equal') 629 plt.draw() 630 if save: 631 fig1.savefig(save) 632 633 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
635 def dump(self, filename, datatype="json.gz", description="", **kwargs): 636 """Dump the Obs to a file 'name' of chosen format. 637 638 Parameters 639 ---------- 640 filename : str 641 name of the file to be saved. 642 datatype : str 643 Format of the exported file. Supported formats include 644 "json.gz" and "pickle" 645 description : str 646 Description for output file, only relevant for json.gz format. 647 path : str 648 specifies a custom path for the file (default '.') 649 """ 650 if 'path' in kwargs: 651 file_name = kwargs.get('path') + '/' + filename 652 else: 653 file_name = filename 654 655 if datatype == "json.gz": 656 from .input.json import dump_to_json 657 dump_to_json([self], file_name, description=description) 658 elif datatype == "pickle": 659 with open(file_name + '.p', 'wb') as fb: 660 pickle.dump(self, fb) 661 else: 662 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
664 def export_jackknife(self): 665 """Export jackknife samples from the Obs 666 667 Returns 668 ------- 669 numpy.ndarray 670 Returns a numpy array of length N + 1 where N is the number of samples 671 for the given ensemble and replicum. The zeroth entry of the array contains 672 the mean value of the Obs, entries 1 to N contain the N jackknife samples 673 derived from the Obs. The current implementation only works for observables 674 defined on exactly one ensemble and replicum. The derived jackknife samples 675 should agree with samples from a full jackknife analysis up to O(1/N). 676 """ 677 678 if len(self.names) != 1: 679 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 680 681 name = self.names[0] 682 full_data = self.deltas[name] + self.r_values[name] 683 n = full_data.size 684 mean = self.value 685 tmp_jacks = np.zeros(n + 1) 686 tmp_jacks[0] = mean 687 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 688 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
871class CObs: 872 """Class for a complex valued observable.""" 873 __slots__ = ['_real', '_imag', 'tag'] 874 875 def __init__(self, real, imag=0.0): 876 self._real = real 877 self._imag = imag 878 self.tag = None 879 880 @property 881 def real(self): 882 return self._real 883 884 @property 885 def imag(self): 886 return self._imag 887 888 def gamma_method(self, **kwargs): 889 """Executes the gamma_method for the real and the imaginary part.""" 890 if isinstance(self.real, Obs): 891 self.real.gamma_method(**kwargs) 892 if isinstance(self.imag, Obs): 893 self.imag.gamma_method(**kwargs) 894 895 def is_zero(self): 896 """Checks whether both real and imaginary part are zero within machine precision.""" 897 return self.real == 0.0 and self.imag == 0.0 898 899 def conjugate(self): 900 return CObs(self.real, -self.imag) 901 902 def __add__(self, other): 903 if isinstance(other, np.ndarray): 904 return other + self 905 elif hasattr(other, 'real') and hasattr(other, 'imag'): 906 return CObs(self.real + other.real, 907 self.imag + other.imag) 908 else: 909 return CObs(self.real + other, self.imag) 910 911 def __radd__(self, y): 912 return self + y 913 914 def __sub__(self, other): 915 if isinstance(other, np.ndarray): 916 return -1 * (other - self) 917 elif hasattr(other, 'real') and hasattr(other, 'imag'): 918 return CObs(self.real - other.real, self.imag - other.imag) 919 else: 920 return CObs(self.real - other, self.imag) 921 922 def __rsub__(self, other): 923 return -1 * (self - other) 924 925 def __mul__(self, other): 926 if isinstance(other, np.ndarray): 927 return other * self 928 elif hasattr(other, 'real') and hasattr(other, 'imag'): 929 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 930 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 931 [self.real, other.real, self.imag, other.imag], 932 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 933 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 934 [self.real, other.real, self.imag, other.imag], 935 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 936 elif getattr(other, 'imag', 0) != 0: 937 return CObs(self.real * other.real - self.imag * other.imag, 938 self.imag * other.real + self.real * other.imag) 939 else: 940 return CObs(self.real * other.real, self.imag * other.real) 941 else: 942 return CObs(self.real * other, self.imag * other) 943 944 def __rmul__(self, other): 945 return self * other 946 947 def __truediv__(self, other): 948 if isinstance(other, np.ndarray): 949 return 1 / (other / self) 950 elif hasattr(other, 'real') and hasattr(other, 'imag'): 951 r = other.real ** 2 + other.imag ** 2 952 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 953 else: 954 return CObs(self.real / other, self.imag / other) 955 956 def __rtruediv__(self, other): 957 r = self.real ** 2 + self.imag ** 2 958 if hasattr(other, 'real') and hasattr(other, 'imag'): 959 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 960 else: 961 return CObs(self.real * other / r, -self.imag * other / r) 962 963 def __abs__(self): 964 return np.sqrt(self.real**2 + self.imag**2) 965 966 def __pos__(self): 967 return self 968 969 def __neg__(self): 970 return -1 * self 971 972 def __eq__(self, other): 973 return self.real == other.real and self.imag == other.imag 974 975 def __str__(self): 976 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 977 978 def __repr__(self): 979 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
888 def gamma_method(self, **kwargs): 889 """Executes the gamma_method for the real and the imaginary part.""" 890 if isinstance(self.real, Obs): 891 self.real.gamma_method(**kwargs) 892 if isinstance(self.imag, Obs): 893 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1112def derived_observable(func, data, array_mode=False, **kwargs): 1113 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1114 1115 Parameters 1116 ---------- 1117 func : object 1118 arbitrary function of the form func(data, **kwargs). For the 1119 automatic differentiation to work, all numpy functions have to have 1120 the autograd wrapper (use 'import autograd.numpy as anp'). 1121 data : list 1122 list of Obs, e.g. [obs1, obs2, obs3]. 1123 num_grad : bool 1124 if True, numerical derivatives are used instead of autograd 1125 (default False). To control the numerical differentiation the 1126 kwargs of numdifftools.step_generators.MaxStepGenerator 1127 can be used. 1128 man_grad : list 1129 manually supply a list or an array which contains the jacobian 1130 of func. Use cautiously, supplying the wrong derivative will 1131 not be intercepted. 1132 1133 Notes 1134 ----- 1135 For simple mathematical operations it can be practical to use anonymous 1136 functions. For the ratio of two observables one can e.g. use 1137 1138 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1139 """ 1140 1141 data = np.asarray(data) 1142 raveled_data = data.ravel() 1143 1144 # Workaround for matrix operations containing non Obs data 1145 if not all(isinstance(x, Obs) for x in raveled_data): 1146 for i in range(len(raveled_data)): 1147 if isinstance(raveled_data[i], (int, float)): 1148 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1149 1150 allcov = {} 1151 for o in raveled_data: 1152 for name in o.cov_names: 1153 if name in allcov: 1154 if not np.allclose(allcov[name], o.covobs[name].cov): 1155 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1156 else: 1157 allcov[name] = o.covobs[name].cov 1158 1159 n_obs = len(raveled_data) 1160 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1161 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1162 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1163 1164 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1165 1166 if data.ndim == 1: 1167 values = np.array([o.value for o in data]) 1168 else: 1169 values = np.vectorize(lambda x: x.value)(data) 1170 1171 new_values = func(values, **kwargs) 1172 1173 multi = int(isinstance(new_values, np.ndarray)) 1174 1175 new_r_values = {} 1176 new_idl_d = {} 1177 for name in new_sample_names: 1178 idl = [] 1179 tmp_values = np.zeros(n_obs) 1180 for i, item in enumerate(raveled_data): 1181 tmp_values[i] = item.r_values.get(name, item.value) 1182 tmp_idl = item.idl.get(name) 1183 if tmp_idl is not None: 1184 idl.append(tmp_idl) 1185 if multi > 0: 1186 tmp_values = np.array(tmp_values).reshape(data.shape) 1187 new_r_values[name] = func(tmp_values, **kwargs) 1188 new_idl_d[name] = _merge_idx(idl) 1189 1190 if 'man_grad' in kwargs: 1191 deriv = np.asarray(kwargs.get('man_grad')) 1192 if new_values.shape + data.shape != deriv.shape: 1193 raise Exception('Manual derivative does not have correct shape.') 1194 elif kwargs.get('num_grad') is True: 1195 if multi > 0: 1196 raise Exception('Multi mode currently not supported for numerical derivative') 1197 options = { 1198 'base_step': 0.1, 1199 'step_ratio': 2.5} 1200 for key in options.keys(): 1201 kwarg = kwargs.get(key) 1202 if kwarg is not None: 1203 options[key] = kwarg 1204 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1205 if tmp_df.size == 1: 1206 deriv = np.array([tmp_df.real]) 1207 else: 1208 deriv = tmp_df.real 1209 else: 1210 deriv = jacobian(func)(values, **kwargs) 1211 1212 final_result = np.zeros(new_values.shape, dtype=object) 1213 1214 if array_mode is True: 1215 1216 class _Zero_grad(): 1217 def __init__(self, N): 1218 self.grad = np.zeros((N, 1)) 1219 1220 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1221 d_extracted = {} 1222 g_extracted = {} 1223 for name in new_sample_names: 1224 d_extracted[name] = [] 1225 ens_length = len(new_idl_d[name]) 1226 for i_dat, dat in enumerate(data): 1227 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1228 for name in new_cov_names: 1229 g_extracted[name] = [] 1230 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1231 for i_dat, dat in enumerate(data): 1232 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1233 1234 for i_val, new_val in np.ndenumerate(new_values): 1235 new_deltas = {} 1236 new_grad = {} 1237 if array_mode is True: 1238 for name in new_sample_names: 1239 ens_length = d_extracted[name][0].shape[-1] 1240 new_deltas[name] = np.zeros(ens_length) 1241 for i_dat, dat in enumerate(d_extracted[name]): 1242 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1243 for name in new_cov_names: 1244 new_grad[name] = 0 1245 for i_dat, dat in enumerate(g_extracted[name]): 1246 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1247 else: 1248 for j_obs, obs in np.ndenumerate(data): 1249 for name in obs.names: 1250 if name in obs.cov_names: 1251 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1252 else: 1253 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1254 1255 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1256 1257 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1258 raise Exception('The same name has been used for deltas and covobs!') 1259 new_samples = [] 1260 new_means = [] 1261 new_idl = [] 1262 new_names_obs = [] 1263 for name in new_names: 1264 if name not in new_covobs: 1265 new_samples.append(new_deltas[name]) 1266 new_idl.append(new_idl_d[name]) 1267 new_means.append(new_r_values[name][i_val]) 1268 new_names_obs.append(name) 1269 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1270 for name in new_covobs: 1271 final_result[i_val].names.append(name) 1272 final_result[i_val]._covobs = new_covobs 1273 final_result[i_val]._value = new_val 1274 final_result[i_val].reweighted = reweighted 1275 1276 if multi == 0: 1277 final_result = final_result.item() 1278 1279 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1316def reweight(weight, obs, **kwargs): 1317 """Reweight a list of observables. 1318 1319 Parameters 1320 ---------- 1321 weight : Obs 1322 Reweighting factor. An Observable that has to be defined on a superset of the 1323 configurations in obs[i].idl for all i. 1324 obs : list 1325 list of Obs, e.g. [obs1, obs2, obs3]. 1326 all_configs : bool 1327 if True, the reweighted observables are normalized by the average of 1328 the reweighting factor on all configurations in weight.idl and not 1329 on the configurations in obs[i].idl. Default False. 1330 """ 1331 result = [] 1332 for i in range(len(obs)): 1333 if len(obs[i].cov_names): 1334 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1335 if not set(obs[i].names).issubset(weight.names): 1336 raise Exception('Error: Ensembles do not fit') 1337 for name in obs[i].names: 1338 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1339 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1340 new_samples = [] 1341 w_deltas = {} 1342 for name in sorted(obs[i].names): 1343 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1344 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1345 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1346 1347 if kwargs.get('all_configs'): 1348 new_weight = weight 1349 else: 1350 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1351 1352 result.append(tmp_obs / new_weight) 1353 result[-1].reweighted = True 1354 1355 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1358def correlate(obs_a, obs_b): 1359 """Correlate two observables. 1360 1361 Parameters 1362 ---------- 1363 obs_a : Obs 1364 First observable 1365 obs_b : Obs 1366 Second observable 1367 1368 Notes 1369 ----- 1370 Keep in mind to only correlate primary observables which have not been reweighted 1371 yet. The reweighting has to be applied after correlating the observables. 1372 Currently only works if ensembles are identical (this is not strictly necessary). 1373 """ 1374 1375 if sorted(obs_a.names) != sorted(obs_b.names): 1376 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1377 if len(obs_a.cov_names) or len(obs_b.cov_names): 1378 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1379 for name in obs_a.names: 1380 if obs_a.shape[name] != obs_b.shape[name]: 1381 raise Exception('Shapes of ensemble', name, 'do not fit') 1382 if obs_a.idl[name] != obs_b.idl[name]: 1383 raise Exception('idl of ensemble', name, 'do not fit') 1384 1385 if obs_a.reweighted is True: 1386 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1387 if obs_b.reweighted is True: 1388 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1389 1390 new_samples = [] 1391 new_idl = [] 1392 for name in sorted(obs_a.names): 1393 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1394 new_idl.append(obs_a.idl[name]) 1395 1396 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1397 o.reweighted = obs_a.reweighted or obs_b.reweighted 1398 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1401def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1402 r'''Calculates the error covariance matrix of a set of observables. 1403 1404 WARNING: This function should be used with care, especially for observables with support on multiple 1405 ensembles with differing autocorrelations. See the notes below for details. 1406 1407 The gamma method has to be applied first to all observables. 1408 1409 Parameters 1410 ---------- 1411 obs : list or numpy.ndarray 1412 List or one dimensional array of Obs 1413 visualize : bool 1414 If True plots the corresponding normalized correlation matrix (default False). 1415 correlation : bool 1416 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1417 smooth : None or int 1418 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1419 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1420 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1421 small ones. 1422 1423 Notes 1424 ----- 1425 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1426 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1427 in the absence of autocorrelation. 1428 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1429 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1430 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1431 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1432 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1433 ''' 1434 1435 length = len(obs) 1436 1437 max_samples = np.max([o.N for o in obs]) 1438 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1439 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1440 1441 cov = np.zeros((length, length)) 1442 for i in range(length): 1443 for j in range(i, length): 1444 cov[i, j] = _covariance_element(obs[i], obs[j]) 1445 cov = cov + cov.T - np.diag(np.diag(cov)) 1446 1447 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1448 1449 if isinstance(smooth, int): 1450 corr = _smooth_eigenvalues(corr, smooth) 1451 1452 if visualize: 1453 plt.matshow(corr, vmin=-1, vmax=1) 1454 plt.set_cmap('RdBu') 1455 plt.colorbar() 1456 plt.draw() 1457 1458 if correlation is True: 1459 return corr 1460 1461 errors = [o.dvalue for o in obs] 1462 cov = np.diag(errors) @ corr @ np.diag(errors) 1463 1464 eigenvalues = np.linalg.eigh(cov)[0] 1465 if not np.all(eigenvalues >= 0): 1466 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1467 1468 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1548def import_jackknife(jacks, name, idl=None): 1549 """Imports jackknife samples and returns an Obs 1550 1551 Parameters 1552 ---------- 1553 jacks : numpy.ndarray 1554 numpy array containing the mean value as zeroth entry and 1555 the N jackknife samples as first to Nth entry. 1556 name : str 1557 name of the ensemble the samples are defined on. 1558 """ 1559 length = len(jacks) - 1 1560 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1561 samples = jacks[1:] @ prj 1562 mean = np.mean(samples) 1563 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1564 new_obs._value = jacks[0] 1565 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1568def merge_obs(list_of_obs): 1569 """Combine all observables in list_of_obs into one new observable 1570 1571 Parameters 1572 ---------- 1573 list_of_obs : list 1574 list of the Obs object to be combined 1575 1576 Notes 1577 ----- 1578 It is not possible to combine obs which are based on the same replicum 1579 """ 1580 replist = [item for obs in list_of_obs for item in obs.names] 1581 if (len(replist) == len(set(replist))) is False: 1582 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1583 if any([len(o.cov_names) for o in list_of_obs]): 1584 raise Exception('Not possible to merge data that contains covobs!') 1585 new_dict = {} 1586 idl_dict = {} 1587 for o in list_of_obs: 1588 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1589 for key in set(o.deltas) | set(o.r_values)}) 1590 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1591 1592 names = sorted(new_dict.keys()) 1593 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1594 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1595 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1598def cov_Obs(means, cov, name, grad=None): 1599 """Create an Obs based on mean(s) and a covariance matrix 1600 1601 Parameters 1602 ---------- 1603 mean : list of floats or float 1604 N mean value(s) of the new Obs 1605 cov : list or array 1606 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1607 name : str 1608 identifier for the covariance matrix 1609 grad : list or array 1610 Gradient of the Covobs wrt. the means belonging to cov. 1611 """ 1612 1613 def covobs_to_obs(co): 1614 """Make an Obs out of a Covobs 1615 1616 Parameters 1617 ---------- 1618 co : Covobs 1619 Covobs to be embedded into the Obs 1620 """ 1621 o = Obs([], [], means=[]) 1622 o._value = co.value 1623 o.names.append(co.name) 1624 o._covobs[co.name] = co 1625 o._dvalue = np.sqrt(co.errsq()) 1626 return o 1627 1628 ol = [] 1629 if isinstance(means, (float, int)): 1630 means = [means] 1631 1632 for i in range(len(means)): 1633 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1634 if ol[0].covobs[name].N != len(means): 1635 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1636 if len(ol) == 1: 1637 return ol[0] 1638 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.