pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15# Improve print output of numpy.ndarrays containing Obs objects. 16np.set_printoptions(formatter={'object': lambda x: str(x)}) 17 18 19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise ValueError('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise ValueError("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise TypeError('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None 141 142 @property 143 def value(self): 144 return self._value 145 146 @property 147 def dvalue(self): 148 return self._dvalue 149 150 @property 151 def e_names(self): 152 return sorted(set([o.split('|')[0] for o in self.names])) 153 154 @property 155 def cov_names(self): 156 return sorted(set([o for o in self.covobs.keys()])) 157 158 @property 159 def mc_names(self): 160 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 161 162 @property 163 def e_content(self): 164 res = {} 165 for e, e_name in enumerate(self.e_names): 166 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 167 if e_name in self.names: 168 res[e_name].append(e_name) 169 return res 170 171 @property 172 def covobs(self): 173 return self._covobs 174 175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 gaps = [] 272 for r_name in e_content[e_name]: 273 if isinstance(self.idl[r_name], range): 274 gaps.append(1) 275 else: 276 gaps.append(np.min(np.diff(self.idl[r_name]))) 277 278 if not np.all([gi == gaps[0] for gi in gaps]): 279 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 280 else: 281 gapsize = gaps[0] 282 283 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 284 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 285 # Make sure no entry of tauint is smaller than 0.5 286 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 287 # hep-lat/0306017 eq. (42) 288 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 289 self.e_n_dtauint[e_name][0] = 0.0 290 291 def _compute_drho(i): 292 tmp = (self.e_rho[e_name][i + 1:w_max] 293 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 294 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 295 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 296 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 297 298 if self.tau_exp[e_name] > 0: 299 _compute_drho(gapsize) 300 texp = self.tau_exp[e_name] 301 # Critical slowing down analysis 302 if w_max // 2 <= 1: 303 raise Exception("Need at least 8 samples for tau_exp error analysis") 304 for n in range(gapsize, w_max // 2, gapsize): 305 _compute_drho(n + gapsize) 306 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 307 # Bias correction hep-lat/0306017 eq. (49) included 308 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 309 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 310 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 311 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 313 self.e_windowsize[e_name] = n 314 break 315 else: 316 if self.S[e_name] == 0.0: 317 self.e_tauint[e_name] = 0.5 318 self.e_dtauint[e_name] = 0.0 319 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 320 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 321 self.e_windowsize[e_name] = 0 322 else: 323 # Standard automatic windowing procedure 324 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 325 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 326 for n in range(1, w_max // gapsize): 327 if g_w[n - 1] < 0 or n >= w_max // gapsize - 1: 328 _compute_drho(gapsize * n) 329 n *= gapsize 330 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 331 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 332 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 333 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 334 self.e_windowsize[e_name] = n 335 break 336 337 self._dvalue += self.e_dvalue[e_name] ** 2 338 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 339 340 for e_name in self.cov_names: 341 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 342 self.e_ddvalue[e_name] = 0 343 self._dvalue += self.e_dvalue[e_name]**2 344 345 self._dvalue = np.sqrt(self._dvalue) 346 if self._dvalue == 0.0: 347 self.ddvalue = 0.0 348 else: 349 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 350 return 351 352 gm = gamma_method 353 354 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 355 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 356 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 357 358 Parameters 359 ---------- 360 deltas : list 361 List of fluctuations 362 idx : list 363 List or range of configurations on which the deltas are defined. 364 shape : int 365 Number of configurations in idx. 366 w_max : int 367 Upper bound for the summation window. 368 fft : bool 369 determines whether the fft algorithm is used for the computation 370 of the autocorrelation function. 371 """ 372 gamma = np.zeros(w_max) 373 deltas = _expand_deltas(deltas, idx, shape) 374 new_shape = len(deltas) 375 if fft: 376 max_gamma = min(new_shape, w_max) 377 # The padding for the fft has to be even 378 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 379 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 380 else: 381 for n in range(w_max): 382 if new_shape - n >= 0: 383 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 384 385 return gamma 386 387 def details(self, ens_content=True): 388 """Output detailed properties of the Obs. 389 390 Parameters 391 ---------- 392 ens_content : bool 393 print details about the ensembles and replica if true. 394 """ 395 if self.tag is not None: 396 print("Description:", self.tag) 397 if not hasattr(self, 'e_dvalue'): 398 print('Result\t %3.8e' % (self.value)) 399 else: 400 if self.value == 0.0: 401 percentage = np.nan 402 else: 403 percentage = np.abs(self._dvalue / self.value) * 100 404 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 405 if len(self.e_names) > 1: 406 print(' Ensemble errors:') 407 e_content = self.e_content 408 for e_name in self.mc_names: 409 if isinstance(self.idl[e_content[e_name][0]], range): 410 gap = self.idl[e_content[e_name][0]].step 411 else: 412 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 413 414 if len(self.e_names) > 1: 415 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 416 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 417 tau_string += f" in units of {gap} config" 418 if gap > 1: 419 tau_string += "s" 420 if self.tau_exp[e_name] > 0: 421 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 422 else: 423 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 424 print(tau_string) 425 for e_name in self.cov_names: 426 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 427 if ens_content is True: 428 if len(self.e_names) == 1: 429 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 430 else: 431 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 432 my_string_list = [] 433 for key, value in sorted(self.e_content.items()): 434 if key not in self.covobs: 435 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 436 if len(value) == 1: 437 my_string += f': {self.shape[value[0]]} configurations' 438 if isinstance(self.idl[value[0]], range): 439 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 440 else: 441 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 442 else: 443 sublist = [] 444 for v in value: 445 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 446 my_substring += f': {self.shape[v]} configurations' 447 if isinstance(self.idl[v], range): 448 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 449 else: 450 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 451 sublist.append(my_substring) 452 453 my_string += '\n' + '\n'.join(sublist) 454 else: 455 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 456 my_string_list.append(my_string) 457 print('\n'.join(my_string_list)) 458 459 def reweight(self, weight): 460 """Reweight the obs with given rewighting factors. 461 462 Parameters 463 ---------- 464 weight : Obs 465 Reweighting factor. An Observable that has to be defined on a superset of the 466 configurations in obs[i].idl for all i. 467 all_configs : bool 468 if True, the reweighted observables are normalized by the average of 469 the reweighting factor on all configurations in weight.idl and not 470 on the configurations in obs[i].idl. Default False. 471 """ 472 return reweight(weight, [self])[0] 473 474 def is_zero_within_error(self, sigma=1): 475 """Checks whether the observable is zero within 'sigma' standard errors. 476 477 Parameters 478 ---------- 479 sigma : int 480 Number of standard errors used for the check. 481 482 Works only properly when the gamma method was run. 483 """ 484 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 485 486 def is_zero(self, atol=1e-10): 487 """Checks whether the observable is zero within a given tolerance. 488 489 Parameters 490 ---------- 491 atol : float 492 Absolute tolerance (for details see numpy documentation). 493 """ 494 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 495 496 def plot_tauint(self, save=None): 497 """Plot integrated autocorrelation time for each ensemble. 498 499 Parameters 500 ---------- 501 save : str 502 saves the figure to a file named 'save' if. 503 """ 504 if not hasattr(self, 'e_dvalue'): 505 raise Exception('Run the gamma method first.') 506 507 for e, e_name in enumerate(self.mc_names): 508 fig = plt.figure() 509 plt.xlabel(r'$W$') 510 plt.ylabel(r'$\tau_\mathrm{int}$') 511 length = int(len(self.e_n_tauint[e_name])) 512 if self.tau_exp[e_name] > 0: 513 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 514 x_help = np.arange(2 * self.tau_exp[e_name]) 515 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 516 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 517 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 518 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 519 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 520 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 521 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 522 else: 523 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 524 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 525 526 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 527 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 528 plt.legend() 529 plt.xlim(-0.5, xmax) 530 ylim = plt.ylim() 531 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 532 plt.draw() 533 if save: 534 fig.savefig(save + "_" + str(e)) 535 536 def plot_rho(self, save=None): 537 """Plot normalized autocorrelation function time for each ensemble. 538 539 Parameters 540 ---------- 541 save : str 542 saves the figure to a file named 'save' if. 543 """ 544 if not hasattr(self, 'e_dvalue'): 545 raise Exception('Run the gamma method first.') 546 for e, e_name in enumerate(self.mc_names): 547 fig = plt.figure() 548 plt.xlabel('W') 549 plt.ylabel('rho') 550 length = int(len(self.e_drho[e_name])) 551 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 552 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 553 if self.tau_exp[e_name] > 0: 554 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 555 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 556 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 557 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 558 else: 559 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 560 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 561 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 562 plt.xlim(-0.5, xmax) 563 plt.draw() 564 if save: 565 fig.savefig(save + "_" + str(e)) 566 567 def plot_rep_dist(self): 568 """Plot replica distribution for each ensemble with more than one replicum.""" 569 if not hasattr(self, 'e_dvalue'): 570 raise Exception('Run the gamma method first.') 571 for e, e_name in enumerate(self.mc_names): 572 if len(self.e_content[e_name]) == 1: 573 print('No replica distribution for a single replicum (', e_name, ')') 574 continue 575 r_length = [] 576 sub_r_mean = 0 577 for r, r_name in enumerate(self.e_content[e_name]): 578 r_length.append(len(self.deltas[r_name])) 579 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 580 e_N = np.sum(r_length) 581 sub_r_mean /= e_N 582 arr = np.zeros(len(self.e_content[e_name])) 583 for r, r_name in enumerate(self.e_content[e_name]): 584 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 585 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 586 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 587 plt.draw() 588 589 def plot_history(self, expand=True): 590 """Plot derived Monte Carlo history for each ensemble 591 592 Parameters 593 ---------- 594 expand : bool 595 show expanded history for irregular Monte Carlo chains (default: True). 596 """ 597 for e, e_name in enumerate(self.mc_names): 598 plt.figure() 599 r_length = [] 600 tmp = [] 601 tmp_expanded = [] 602 for r, r_name in enumerate(self.e_content[e_name]): 603 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 604 if expand: 605 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 606 r_length.append(len(tmp_expanded[-1])) 607 else: 608 r_length.append(len(tmp[-1])) 609 e_N = np.sum(r_length) 610 x = np.arange(e_N) 611 y_test = np.concatenate(tmp, axis=0) 612 if expand: 613 y = np.concatenate(tmp_expanded, axis=0) 614 else: 615 y = y_test 616 plt.errorbar(x, y, fmt='.', markersize=3) 617 plt.xlim(-0.5, e_N - 0.5) 618 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 619 plt.draw() 620 621 def plot_piechart(self, save=None): 622 """Plot piechart which shows the fractional contribution of each 623 ensemble to the error and returns a dictionary containing the fractions. 624 625 Parameters 626 ---------- 627 save : str 628 saves the figure to a file named 'save' if. 629 """ 630 if not hasattr(self, 'e_dvalue'): 631 raise Exception('Run the gamma method first.') 632 if np.isclose(0.0, self._dvalue, atol=1e-15): 633 raise Exception('Error is 0.0') 634 labels = self.e_names 635 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 636 fig1, ax1 = plt.subplots() 637 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 638 ax1.axis('equal') 639 plt.draw() 640 if save: 641 fig1.savefig(save) 642 643 return dict(zip(self.e_names, sizes)) 644 645 def dump(self, filename, datatype="json.gz", description="", **kwargs): 646 """Dump the Obs to a file 'name' of chosen format. 647 648 Parameters 649 ---------- 650 filename : str 651 name of the file to be saved. 652 datatype : str 653 Format of the exported file. Supported formats include 654 "json.gz" and "pickle" 655 description : str 656 Description for output file, only relevant for json.gz format. 657 path : str 658 specifies a custom path for the file (default '.') 659 """ 660 if 'path' in kwargs: 661 file_name = kwargs.get('path') + '/' + filename 662 else: 663 file_name = filename 664 665 if datatype == "json.gz": 666 from .input.json import dump_to_json 667 dump_to_json([self], file_name, description=description) 668 elif datatype == "pickle": 669 with open(file_name + '.p', 'wb') as fb: 670 pickle.dump(self, fb) 671 else: 672 raise Exception("Unknown datatype " + str(datatype)) 673 674 def export_jackknife(self): 675 """Export jackknife samples from the Obs 676 677 Returns 678 ------- 679 numpy.ndarray 680 Returns a numpy array of length N + 1 where N is the number of samples 681 for the given ensemble and replicum. The zeroth entry of the array contains 682 the mean value of the Obs, entries 1 to N contain the N jackknife samples 683 derived from the Obs. The current implementation only works for observables 684 defined on exactly one ensemble and replicum. The derived jackknife samples 685 should agree with samples from a full jackknife analysis up to O(1/N). 686 """ 687 688 if len(self.names) != 1: 689 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 690 691 name = self.names[0] 692 full_data = self.deltas[name] + self.r_values[name] 693 n = full_data.size 694 mean = self.value 695 tmp_jacks = np.zeros(n + 1) 696 tmp_jacks[0] = mean 697 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 698 return tmp_jacks 699 700 def __float__(self): 701 return float(self.value) 702 703 def __repr__(self): 704 return 'Obs[' + str(self) + ']' 705 706 def __str__(self): 707 return _format_uncertainty(self.value, self._dvalue) 708 709 def __hash__(self): 710 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 711 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 712 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 713 hash_tuple += tuple([o.encode() for o in self.names]) 714 m = hashlib.md5() 715 [m.update(o) for o in hash_tuple] 716 return int(m.hexdigest(), 16) & 0xFFFFFFFF 717 718 # Overload comparisons 719 def __lt__(self, other): 720 return self.value < other 721 722 def __le__(self, other): 723 return self.value <= other 724 725 def __gt__(self, other): 726 return self.value > other 727 728 def __ge__(self, other): 729 return self.value >= other 730 731 def __eq__(self, other): 732 return (self - other).is_zero() 733 734 def __ne__(self, other): 735 return not (self - other).is_zero() 736 737 # Overload math operations 738 def __add__(self, y): 739 if isinstance(y, Obs): 740 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 741 else: 742 if isinstance(y, np.ndarray): 743 return np.array([self + o for o in y]) 744 elif y.__class__.__name__ in ['Corr', 'CObs']: 745 return NotImplemented 746 else: 747 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 748 749 def __radd__(self, y): 750 return self + y 751 752 def __mul__(self, y): 753 if isinstance(y, Obs): 754 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 755 else: 756 if isinstance(y, np.ndarray): 757 return np.array([self * o for o in y]) 758 elif isinstance(y, complex): 759 return CObs(self * y.real, self * y.imag) 760 elif y.__class__.__name__ in ['Corr', 'CObs']: 761 return NotImplemented 762 else: 763 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 764 765 def __rmul__(self, y): 766 return self * y 767 768 def __sub__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 771 else: 772 if isinstance(y, np.ndarray): 773 return np.array([self - o for o in y]) 774 elif y.__class__.__name__ in ['Corr', 'CObs']: 775 return NotImplemented 776 else: 777 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 778 779 def __rsub__(self, y): 780 return -1 * (self - y) 781 782 def __pos__(self): 783 return self 784 785 def __neg__(self): 786 return -1 * self 787 788 def __truediv__(self, y): 789 if isinstance(y, Obs): 790 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 791 else: 792 if isinstance(y, np.ndarray): 793 return np.array([self / o for o in y]) 794 elif y.__class__.__name__ in ['Corr', 'CObs']: 795 return NotImplemented 796 else: 797 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 798 799 def __rtruediv__(self, y): 800 if isinstance(y, Obs): 801 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 802 else: 803 if isinstance(y, np.ndarray): 804 return np.array([o / self for o in y]) 805 elif y.__class__.__name__ in ['Corr', 'CObs']: 806 return NotImplemented 807 else: 808 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 809 810 def __pow__(self, y): 811 if isinstance(y, Obs): 812 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 813 else: 814 return derived_observable(lambda x: x[0] ** y, [self]) 815 816 def __rpow__(self, y): 817 if isinstance(y, Obs): 818 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 819 else: 820 return derived_observable(lambda x: y ** x[0], [self]) 821 822 def __abs__(self): 823 return derived_observable(lambda x: anp.abs(x[0]), [self]) 824 825 # Overload numpy functions 826 def sqrt(self): 827 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 828 829 def log(self): 830 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 831 832 def exp(self): 833 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 834 835 def sin(self): 836 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 837 838 def cos(self): 839 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 840 841 def tan(self): 842 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 843 844 def arcsin(self): 845 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 846 847 def arccos(self): 848 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 849 850 def arctan(self): 851 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 852 853 def sinh(self): 854 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 855 856 def cosh(self): 857 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 858 859 def tanh(self): 860 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 861 862 def arcsinh(self): 863 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 864 865 def arccosh(self): 866 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 867 868 def arctanh(self): 869 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 870 871 872class CObs: 873 """Class for a complex valued observable.""" 874 __slots__ = ['_real', '_imag', 'tag'] 875 876 def __init__(self, real, imag=0.0): 877 self._real = real 878 self._imag = imag 879 self.tag = None 880 881 @property 882 def real(self): 883 return self._real 884 885 @property 886 def imag(self): 887 return self._imag 888 889 def gamma_method(self, **kwargs): 890 """Executes the gamma_method for the real and the imaginary part.""" 891 if isinstance(self.real, Obs): 892 self.real.gamma_method(**kwargs) 893 if isinstance(self.imag, Obs): 894 self.imag.gamma_method(**kwargs) 895 896 def is_zero(self): 897 """Checks whether both real and imaginary part are zero within machine precision.""" 898 return self.real == 0.0 and self.imag == 0.0 899 900 def conjugate(self): 901 return CObs(self.real, -self.imag) 902 903 def __add__(self, other): 904 if isinstance(other, np.ndarray): 905 return other + self 906 elif hasattr(other, 'real') and hasattr(other, 'imag'): 907 return CObs(self.real + other.real, 908 self.imag + other.imag) 909 else: 910 return CObs(self.real + other, self.imag) 911 912 def __radd__(self, y): 913 return self + y 914 915 def __sub__(self, other): 916 if isinstance(other, np.ndarray): 917 return -1 * (other - self) 918 elif hasattr(other, 'real') and hasattr(other, 'imag'): 919 return CObs(self.real - other.real, self.imag - other.imag) 920 else: 921 return CObs(self.real - other, self.imag) 922 923 def __rsub__(self, other): 924 return -1 * (self - other) 925 926 def __mul__(self, other): 927 if isinstance(other, np.ndarray): 928 return other * self 929 elif hasattr(other, 'real') and hasattr(other, 'imag'): 930 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 931 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 932 [self.real, other.real, self.imag, other.imag], 933 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 934 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 935 [self.real, other.real, self.imag, other.imag], 936 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 937 elif getattr(other, 'imag', 0) != 0: 938 return CObs(self.real * other.real - self.imag * other.imag, 939 self.imag * other.real + self.real * other.imag) 940 else: 941 return CObs(self.real * other.real, self.imag * other.real) 942 else: 943 return CObs(self.real * other, self.imag * other) 944 945 def __rmul__(self, other): 946 return self * other 947 948 def __truediv__(self, other): 949 if isinstance(other, np.ndarray): 950 return 1 / (other / self) 951 elif hasattr(other, 'real') and hasattr(other, 'imag'): 952 r = other.real ** 2 + other.imag ** 2 953 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 954 else: 955 return CObs(self.real / other, self.imag / other) 956 957 def __rtruediv__(self, other): 958 r = self.real ** 2 + self.imag ** 2 959 if hasattr(other, 'real') and hasattr(other, 'imag'): 960 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 961 else: 962 return CObs(self.real * other / r, -self.imag * other / r) 963 964 def __abs__(self): 965 return np.sqrt(self.real**2 + self.imag**2) 966 967 def __pos__(self): 968 return self 969 970 def __neg__(self): 971 return -1 * self 972 973 def __eq__(self, other): 974 return self.real == other.real and self.imag == other.imag 975 976 def __str__(self): 977 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 978 979 def __repr__(self): 980 return 'CObs[' + str(self) + ']' 981 982 983def _format_uncertainty(value, dvalue): 984 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 985 if dvalue == 0.0: 986 return str(value) 987 fexp = np.floor(np.log10(dvalue)) 988 if fexp < 0.0: 989 return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 990 elif fexp == 0.0: 991 return '{:.1f}({:1.1f})'.format(value, dvalue) 992 else: 993 return '{:.0f}({:2.0f})'.format(value, dvalue) 994 995 996def _expand_deltas(deltas, idx, shape): 997 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 998 If idx is of type range, the deltas are not changed 999 1000 Parameters 1001 ---------- 1002 deltas : list 1003 List of fluctuations 1004 idx : list 1005 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1006 shape : int 1007 Number of configs in idx. 1008 """ 1009 if isinstance(idx, range): 1010 return deltas 1011 else: 1012 ret = np.zeros(idx[-1] - idx[0] + 1) 1013 for i in range(shape): 1014 ret[idx[i] - idx[0]] = deltas[i] 1015 return ret 1016 1017 1018def _merge_idx(idl): 1019 """Returns the union of all lists in idl as sorted list 1020 1021 Parameters 1022 ---------- 1023 idl : list 1024 List of lists or ranges. 1025 """ 1026 1027 # Use groupby to efficiently check whether all elements of idl are identical 1028 try: 1029 g = groupby(idl) 1030 if next(g, True) and not next(g, False): 1031 return idl[0] 1032 except Exception: 1033 pass 1034 1035 if np.all([type(idx) is range for idx in idl]): 1036 if len(set([idx[0] for idx in idl])) == 1: 1037 idstart = min([idx.start for idx in idl]) 1038 idstop = max([idx.stop for idx in idl]) 1039 idstep = min([idx.step for idx in idl]) 1040 return range(idstart, idstop, idstep) 1041 1042 return sorted(set().union(*idl)) 1043 1044 1045def _intersection_idx(idl): 1046 """Returns the intersection of all lists in idl as sorted list 1047 1048 Parameters 1049 ---------- 1050 idl : list 1051 List of lists or ranges. 1052 """ 1053 1054 def _lcm(*args): 1055 """Returns the lowest common multiple of args. 1056 1057 From python 3.9 onwards the math library contains an lcm function.""" 1058 return reduce(lambda a, b: a * b // gcd(a, b), args) 1059 1060 # Use groupby to efficiently check whether all elements of idl are identical 1061 try: 1062 g = groupby(idl) 1063 if next(g, True) and not next(g, False): 1064 return idl[0] 1065 except Exception: 1066 pass 1067 1068 if np.all([type(idx) is range for idx in idl]): 1069 if len(set([idx[0] for idx in idl])) == 1: 1070 idstart = max([idx.start for idx in idl]) 1071 idstop = min([idx.stop for idx in idl]) 1072 idstep = _lcm(*[idx.step for idx in idl]) 1073 return range(idstart, idstop, idstep) 1074 1075 return sorted(set.intersection(*[set(o) for o in idl])) 1076 1077 1078def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1079 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1080 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1081 common divisor of the step sizes is used as new step size. 1082 1083 Parameters 1084 ---------- 1085 deltas : list 1086 List of fluctuations 1087 idx : list 1088 List or range of configs on which the deltas are defined. 1089 Has to be a subset of new_idx and has to be sorted in ascending order. 1090 shape : list 1091 Number of configs in idx. 1092 new_idx : list 1093 List of configs that defines the new range, has to be sorted in ascending order. 1094 """ 1095 1096 if type(idx) is range and type(new_idx) is range: 1097 if idx == new_idx: 1098 return deltas 1099 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1100 for i in range(shape): 1101 ret[idx[i] - new_idx[0]] = deltas[i] 1102 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) 1103 1104 1105def derived_observable(func, data, array_mode=False, **kwargs): 1106 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1107 1108 Parameters 1109 ---------- 1110 func : object 1111 arbitrary function of the form func(data, **kwargs). For the 1112 automatic differentiation to work, all numpy functions have to have 1113 the autograd wrapper (use 'import autograd.numpy as anp'). 1114 data : list 1115 list of Obs, e.g. [obs1, obs2, obs3]. 1116 num_grad : bool 1117 if True, numerical derivatives are used instead of autograd 1118 (default False). To control the numerical differentiation the 1119 kwargs of numdifftools.step_generators.MaxStepGenerator 1120 can be used. 1121 man_grad : list 1122 manually supply a list or an array which contains the jacobian 1123 of func. Use cautiously, supplying the wrong derivative will 1124 not be intercepted. 1125 1126 Notes 1127 ----- 1128 For simple mathematical operations it can be practical to use anonymous 1129 functions. For the ratio of two observables one can e.g. use 1130 1131 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1132 """ 1133 1134 data = np.asarray(data) 1135 raveled_data = data.ravel() 1136 1137 # Workaround for matrix operations containing non Obs data 1138 if not all(isinstance(x, Obs) for x in raveled_data): 1139 for i in range(len(raveled_data)): 1140 if isinstance(raveled_data[i], (int, float)): 1141 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1142 1143 allcov = {} 1144 for o in raveled_data: 1145 for name in o.cov_names: 1146 if name in allcov: 1147 if not np.allclose(allcov[name], o.covobs[name].cov): 1148 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1149 else: 1150 allcov[name] = o.covobs[name].cov 1151 1152 n_obs = len(raveled_data) 1153 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1154 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1155 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1156 1157 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1158 1159 if data.ndim == 1: 1160 values = np.array([o.value for o in data]) 1161 else: 1162 values = np.vectorize(lambda x: x.value)(data) 1163 1164 new_values = func(values, **kwargs) 1165 1166 multi = int(isinstance(new_values, np.ndarray)) 1167 1168 new_r_values = {} 1169 new_idl_d = {} 1170 for name in new_sample_names: 1171 idl = [] 1172 tmp_values = np.zeros(n_obs) 1173 for i, item in enumerate(raveled_data): 1174 tmp_values[i] = item.r_values.get(name, item.value) 1175 tmp_idl = item.idl.get(name) 1176 if tmp_idl is not None: 1177 idl.append(tmp_idl) 1178 if multi > 0: 1179 tmp_values = np.array(tmp_values).reshape(data.shape) 1180 new_r_values[name] = func(tmp_values, **kwargs) 1181 new_idl_d[name] = _merge_idx(idl) 1182 1183 if 'man_grad' in kwargs: 1184 deriv = np.asarray(kwargs.get('man_grad')) 1185 if new_values.shape + data.shape != deriv.shape: 1186 raise Exception('Manual derivative does not have correct shape.') 1187 elif kwargs.get('num_grad') is True: 1188 if multi > 0: 1189 raise Exception('Multi mode currently not supported for numerical derivative') 1190 options = { 1191 'base_step': 0.1, 1192 'step_ratio': 2.5} 1193 for key in options.keys(): 1194 kwarg = kwargs.get(key) 1195 if kwarg is not None: 1196 options[key] = kwarg 1197 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1198 if tmp_df.size == 1: 1199 deriv = np.array([tmp_df.real]) 1200 else: 1201 deriv = tmp_df.real 1202 else: 1203 deriv = jacobian(func)(values, **kwargs) 1204 1205 final_result = np.zeros(new_values.shape, dtype=object) 1206 1207 if array_mode is True: 1208 1209 class _Zero_grad(): 1210 def __init__(self, N): 1211 self.grad = np.zeros((N, 1)) 1212 1213 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1214 d_extracted = {} 1215 g_extracted = {} 1216 for name in new_sample_names: 1217 d_extracted[name] = [] 1218 ens_length = len(new_idl_d[name]) 1219 for i_dat, dat in enumerate(data): 1220 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1221 for name in new_cov_names: 1222 g_extracted[name] = [] 1223 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1224 for i_dat, dat in enumerate(data): 1225 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1226 1227 for i_val, new_val in np.ndenumerate(new_values): 1228 new_deltas = {} 1229 new_grad = {} 1230 if array_mode is True: 1231 for name in new_sample_names: 1232 ens_length = d_extracted[name][0].shape[-1] 1233 new_deltas[name] = np.zeros(ens_length) 1234 for i_dat, dat in enumerate(d_extracted[name]): 1235 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1236 for name in new_cov_names: 1237 new_grad[name] = 0 1238 for i_dat, dat in enumerate(g_extracted[name]): 1239 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1240 else: 1241 for j_obs, obs in np.ndenumerate(data): 1242 for name in obs.names: 1243 if name in obs.cov_names: 1244 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1245 else: 1246 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1247 1248 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1249 1250 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1251 raise Exception('The same name has been used for deltas and covobs!') 1252 new_samples = [] 1253 new_means = [] 1254 new_idl = [] 1255 new_names_obs = [] 1256 for name in new_names: 1257 if name not in new_covobs: 1258 new_samples.append(new_deltas[name]) 1259 new_idl.append(new_idl_d[name]) 1260 new_means.append(new_r_values[name][i_val]) 1261 new_names_obs.append(name) 1262 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1263 for name in new_covobs: 1264 final_result[i_val].names.append(name) 1265 final_result[i_val]._covobs = new_covobs 1266 final_result[i_val]._value = new_val 1267 final_result[i_val].reweighted = reweighted 1268 1269 if multi == 0: 1270 final_result = final_result.item() 1271 1272 return final_result 1273 1274 1275def _reduce_deltas(deltas, idx_old, idx_new): 1276 """Extract deltas defined on idx_old on all configs of idx_new. 1277 1278 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1279 are ordered in an ascending order. 1280 1281 Parameters 1282 ---------- 1283 deltas : list 1284 List of fluctuations 1285 idx_old : list 1286 List or range of configs on which the deltas are defined 1287 idx_new : list 1288 List of configs for which we want to extract the deltas. 1289 Has to be a subset of idx_old. 1290 """ 1291 if not len(deltas) == len(idx_old): 1292 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1293 if type(idx_old) is range and type(idx_new) is range: 1294 if idx_old == idx_new: 1295 return deltas 1296 # Use groupby to efficiently check whether all elements of idx_old and idx_new are identical 1297 try: 1298 g = groupby([idx_old, idx_new]) 1299 if next(g, True) and not next(g, False): 1300 return deltas 1301 except Exception: 1302 pass 1303 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1304 if len(indices) < len(idx_new): 1305 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1306 return np.array(deltas)[indices] 1307 1308 1309def reweight(weight, obs, **kwargs): 1310 """Reweight a list of observables. 1311 1312 Parameters 1313 ---------- 1314 weight : Obs 1315 Reweighting factor. An Observable that has to be defined on a superset of the 1316 configurations in obs[i].idl for all i. 1317 obs : list 1318 list of Obs, e.g. [obs1, obs2, obs3]. 1319 all_configs : bool 1320 if True, the reweighted observables are normalized by the average of 1321 the reweighting factor on all configurations in weight.idl and not 1322 on the configurations in obs[i].idl. Default False. 1323 """ 1324 result = [] 1325 for i in range(len(obs)): 1326 if len(obs[i].cov_names): 1327 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1328 if not set(obs[i].names).issubset(weight.names): 1329 raise Exception('Error: Ensembles do not fit') 1330 for name in obs[i].names: 1331 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1332 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1333 new_samples = [] 1334 w_deltas = {} 1335 for name in sorted(obs[i].names): 1336 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1337 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1338 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1339 1340 if kwargs.get('all_configs'): 1341 new_weight = weight 1342 else: 1343 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1344 1345 result.append(tmp_obs / new_weight) 1346 result[-1].reweighted = True 1347 1348 return result 1349 1350 1351def correlate(obs_a, obs_b): 1352 """Correlate two observables. 1353 1354 Parameters 1355 ---------- 1356 obs_a : Obs 1357 First observable 1358 obs_b : Obs 1359 Second observable 1360 1361 Notes 1362 ----- 1363 Keep in mind to only correlate primary observables which have not been reweighted 1364 yet. The reweighting has to be applied after correlating the observables. 1365 Currently only works if ensembles are identical (this is not strictly necessary). 1366 """ 1367 1368 if sorted(obs_a.names) != sorted(obs_b.names): 1369 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1370 if len(obs_a.cov_names) or len(obs_b.cov_names): 1371 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1372 for name in obs_a.names: 1373 if obs_a.shape[name] != obs_b.shape[name]: 1374 raise Exception('Shapes of ensemble', name, 'do not fit') 1375 if obs_a.idl[name] != obs_b.idl[name]: 1376 raise Exception('idl of ensemble', name, 'do not fit') 1377 1378 if obs_a.reweighted is True: 1379 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1380 if obs_b.reweighted is True: 1381 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1382 1383 new_samples = [] 1384 new_idl = [] 1385 for name in sorted(obs_a.names): 1386 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1387 new_idl.append(obs_a.idl[name]) 1388 1389 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1390 o.reweighted = obs_a.reweighted or obs_b.reweighted 1391 return o 1392 1393 1394def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1395 r'''Calculates the error covariance matrix of a set of observables. 1396 1397 WARNING: This function should be used with care, especially for observables with support on multiple 1398 ensembles with differing autocorrelations. See the notes below for details. 1399 1400 The gamma method has to be applied first to all observables. 1401 1402 Parameters 1403 ---------- 1404 obs : list or numpy.ndarray 1405 List or one dimensional array of Obs 1406 visualize : bool 1407 If True plots the corresponding normalized correlation matrix (default False). 1408 correlation : bool 1409 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1410 smooth : None or int 1411 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1412 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1413 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1414 small ones. 1415 1416 Notes 1417 ----- 1418 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1419 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1420 in the absence of autocorrelation. 1421 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1422 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1423 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1424 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1425 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1426 ''' 1427 1428 length = len(obs) 1429 1430 max_samples = np.max([o.N for o in obs]) 1431 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1432 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1433 1434 cov = np.zeros((length, length)) 1435 for i in range(length): 1436 for j in range(i, length): 1437 cov[i, j] = _covariance_element(obs[i], obs[j]) 1438 cov = cov + cov.T - np.diag(np.diag(cov)) 1439 1440 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1441 1442 if isinstance(smooth, int): 1443 corr = _smooth_eigenvalues(corr, smooth) 1444 1445 if visualize: 1446 plt.matshow(corr, vmin=-1, vmax=1) 1447 plt.set_cmap('RdBu') 1448 plt.colorbar() 1449 plt.draw() 1450 1451 if correlation is True: 1452 return corr 1453 1454 errors = [o.dvalue for o in obs] 1455 cov = np.diag(errors) @ corr @ np.diag(errors) 1456 1457 eigenvalues = np.linalg.eigh(cov)[0] 1458 if not np.all(eigenvalues >= 0): 1459 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1460 1461 return cov 1462 1463 1464def _smooth_eigenvalues(corr, E): 1465 """Eigenvalue smoothing as described in hep-lat/9412087 1466 1467 corr : np.ndarray 1468 correlation matrix 1469 E : integer 1470 Number of eigenvalues to be left substantially unchanged 1471 """ 1472 if not (2 < E < corr.shape[0] - 1): 1473 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1474 vals, vec = np.linalg.eigh(corr) 1475 lambda_min = np.mean(vals[:-E]) 1476 vals[vals < lambda_min] = lambda_min 1477 vals /= np.mean(vals) 1478 return vec @ np.diag(vals) @ vec.T 1479 1480 1481def _covariance_element(obs1, obs2): 1482 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1483 1484 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1485 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1486 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1487 return np.sum(deltas1 * deltas2) 1488 1489 if set(obs1.names).isdisjoint(set(obs2.names)): 1490 return 0.0 1491 1492 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1493 raise Exception('The gamma method has to be applied to both Obs first.') 1494 1495 dvalue = 0.0 1496 1497 for e_name in obs1.mc_names: 1498 1499 if e_name not in obs2.mc_names: 1500 continue 1501 1502 idl_d = {} 1503 for r_name in obs1.e_content[e_name]: 1504 if r_name not in obs2.e_content[e_name]: 1505 continue 1506 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1507 1508 gamma = 0.0 1509 1510 for r_name in obs1.e_content[e_name]: 1511 if r_name not in obs2.e_content[e_name]: 1512 continue 1513 if len(idl_d[r_name]) == 0: 1514 continue 1515 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1516 1517 if gamma == 0.0: 1518 continue 1519 1520 gamma_div = 0.0 1521 for r_name in obs1.e_content[e_name]: 1522 if r_name not in obs2.e_content[e_name]: 1523 continue 1524 if len(idl_d[r_name]) == 0: 1525 continue 1526 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1527 gamma /= gamma_div 1528 1529 dvalue += gamma 1530 1531 for e_name in obs1.cov_names: 1532 1533 if e_name not in obs2.cov_names: 1534 continue 1535 1536 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1537 1538 return dvalue 1539 1540 1541def import_jackknife(jacks, name, idl=None): 1542 """Imports jackknife samples and returns an Obs 1543 1544 Parameters 1545 ---------- 1546 jacks : numpy.ndarray 1547 numpy array containing the mean value as zeroth entry and 1548 the N jackknife samples as first to Nth entry. 1549 name : str 1550 name of the ensemble the samples are defined on. 1551 """ 1552 length = len(jacks) - 1 1553 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1554 samples = jacks[1:] @ prj 1555 mean = np.mean(samples) 1556 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1557 new_obs._value = jacks[0] 1558 return new_obs 1559 1560 1561def merge_obs(list_of_obs): 1562 """Combine all observables in list_of_obs into one new observable 1563 1564 Parameters 1565 ---------- 1566 list_of_obs : list 1567 list of the Obs object to be combined 1568 1569 Notes 1570 ----- 1571 It is not possible to combine obs which are based on the same replicum 1572 """ 1573 replist = [item for obs in list_of_obs for item in obs.names] 1574 if (len(replist) == len(set(replist))) is False: 1575 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1576 if any([len(o.cov_names) for o in list_of_obs]): 1577 raise Exception('Not possible to merge data that contains covobs!') 1578 new_dict = {} 1579 idl_dict = {} 1580 for o in list_of_obs: 1581 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1582 for key in set(o.deltas) | set(o.r_values)}) 1583 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1584 1585 names = sorted(new_dict.keys()) 1586 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1587 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1588 return o 1589 1590 1591def cov_Obs(means, cov, name, grad=None): 1592 """Create an Obs based on mean(s) and a covariance matrix 1593 1594 Parameters 1595 ---------- 1596 mean : list of floats or float 1597 N mean value(s) of the new Obs 1598 cov : list or array 1599 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1600 name : str 1601 identifier for the covariance matrix 1602 grad : list or array 1603 Gradient of the Covobs wrt. the means belonging to cov. 1604 """ 1605 1606 def covobs_to_obs(co): 1607 """Make an Obs out of a Covobs 1608 1609 Parameters 1610 ---------- 1611 co : Covobs 1612 Covobs to be embedded into the Obs 1613 """ 1614 o = Obs([], [], means=[]) 1615 o._value = co.value 1616 o.names.append(co.name) 1617 o._covobs[co.name] = co 1618 o._dvalue = np.sqrt(co.errsq()) 1619 return o 1620 1621 ol = [] 1622 if isinstance(means, (float, int)): 1623 means = [means] 1624 1625 for i in range(len(means)): 1626 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1627 if ol[0].covobs[name].N != len(means): 1628 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1629 if len(ol) == 1: 1630 return ol[0] 1631 return ol
20class Obs: 21 """Class for a general observable. 22 23 Instances of Obs are the basic objects of a pyerrors error analysis. 24 They are initialized with a list which contains arrays of samples for 25 different ensembles/replica and another list of same length which contains 26 the names of the ensembles/replica. Mathematical operations can be 27 performed on instances. The result is another instance of Obs. The error of 28 an instance can be computed with the gamma_method. Also contains additional 29 methods for output and visualization of the error calculation. 30 31 Attributes 32 ---------- 33 S_global : float 34 Standard value for S (default 2.0) 35 S_dict : dict 36 Dictionary for S values. If an entry for a given ensemble 37 exists this overwrites the standard value for that ensemble. 38 tau_exp_global : float 39 Standard value for tau_exp (default 0.0) 40 tau_exp_dict : dict 41 Dictionary for tau_exp values. If an entry for a given ensemble exists 42 this overwrites the standard value for that ensemble. 43 N_sigma_global : float 44 Standard value for N_sigma (default 1.0) 45 N_sigma_dict : dict 46 Dictionary for N_sigma values. If an entry for a given ensemble exists 47 this overwrites the standard value for that ensemble. 48 """ 49 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 50 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 51 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 52 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 53 'idl', 'tag', '_covobs', '__dict__'] 54 55 S_global = 2.0 56 S_dict = {} 57 tau_exp_global = 0.0 58 tau_exp_dict = {} 59 N_sigma_global = 1.0 60 N_sigma_dict = {} 61 62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise ValueError('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise ValueError('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise ValueError('Names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise ValueError('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.idl = {} 102 if idl is not None: 103 for name, idx in sorted(zip(names, idl)): 104 if isinstance(idx, range): 105 self.idl[name] = idx 106 elif isinstance(idx, (list, np.ndarray)): 107 dc = np.unique(np.diff(idx)) 108 if np.any(dc < 0): 109 raise ValueError("Unsorted idx for idl[%s]" % (name)) 110 if len(dc) == 1: 111 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 112 else: 113 self.idl[name] = list(idx) 114 else: 115 raise TypeError('incompatible type for idl[%s].' % (name)) 116 else: 117 for name, sample in sorted(zip(names, samples)): 118 self.idl[name] = range(1, len(sample) + 1) 119 120 if kwargs.get("means") is not None: 121 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 self.r_values[name] = mean 125 self.deltas[name] = sample 126 else: 127 for name, sample in sorted(zip(names, samples)): 128 self.shape[name] = len(self.idl[name]) 129 self.N += self.shape[name] 130 if len(sample) != self.shape[name]: 131 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 132 self.r_values[name] = np.mean(sample) 133 self.deltas[name] = sample - self.r_values[name] 134 self._value += self.shape[name] * self.r_values[name] 135 self._value /= self.N 136 137 self._dvalue = 0.0 138 self.ddvalue = 0.0 139 self.reweighted = False 140 141 self.tag = None 142 143 @property 144 def value(self): 145 return self._value 146 147 @property 148 def dvalue(self): 149 return self._dvalue 150 151 @property 152 def e_names(self): 153 return sorted(set([o.split('|')[0] for o in self.names])) 154 155 @property 156 def cov_names(self): 157 return sorted(set([o for o in self.covobs.keys()])) 158 159 @property 160 def mc_names(self): 161 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 162 163 @property 164 def e_content(self): 165 res = {} 166 for e, e_name in enumerate(self.e_names): 167 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 168 if e_name in self.names: 169 res[e_name].append(e_name) 170 return res 171 172 @property 173 def covobs(self): 174 return self._covobs 175 176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name])) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 gaps = [] 273 for r_name in e_content[e_name]: 274 if isinstance(self.idl[r_name], range): 275 gaps.append(1) 276 else: 277 gaps.append(np.min(np.diff(self.idl[r_name]))) 278 279 if not np.all([gi == gaps[0] for gi in gaps]): 280 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 281 else: 282 gapsize = gaps[0] 283 284 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 285 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 286 # Make sure no entry of tauint is smaller than 0.5 287 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 288 # hep-lat/0306017 eq. (42) 289 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 290 self.e_n_dtauint[e_name][0] = 0.0 291 292 def _compute_drho(i): 293 tmp = (self.e_rho[e_name][i + 1:w_max] 294 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 295 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 296 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 297 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 298 299 if self.tau_exp[e_name] > 0: 300 _compute_drho(gapsize) 301 texp = self.tau_exp[e_name] 302 # Critical slowing down analysis 303 if w_max // 2 <= 1: 304 raise Exception("Need at least 8 samples for tau_exp error analysis") 305 for n in range(gapsize, w_max // 2, gapsize): 306 _compute_drho(n + gapsize) 307 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 308 # Bias correction hep-lat/0306017 eq. (49) included 309 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 310 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 311 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 312 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 313 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 314 self.e_windowsize[e_name] = n 315 break 316 else: 317 if self.S[e_name] == 0.0: 318 self.e_tauint[e_name] = 0.5 319 self.e_dtauint[e_name] = 0.0 320 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 322 self.e_windowsize[e_name] = 0 323 else: 324 # Standard automatic windowing procedure 325 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 326 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 327 for n in range(1, w_max // gapsize): 328 if g_w[n - 1] < 0 or n >= w_max // gapsize - 1: 329 _compute_drho(gapsize * n) 330 n *= gapsize 331 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 332 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 333 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 334 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 335 self.e_windowsize[e_name] = n 336 break 337 338 self._dvalue += self.e_dvalue[e_name] ** 2 339 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 340 341 for e_name in self.cov_names: 342 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 343 self.e_ddvalue[e_name] = 0 344 self._dvalue += self.e_dvalue[e_name]**2 345 346 self._dvalue = np.sqrt(self._dvalue) 347 if self._dvalue == 0.0: 348 self.ddvalue = 0.0 349 else: 350 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 351 return 352 353 gm = gamma_method 354 355 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 356 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 357 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 358 359 Parameters 360 ---------- 361 deltas : list 362 List of fluctuations 363 idx : list 364 List or range of configurations on which the deltas are defined. 365 shape : int 366 Number of configurations in idx. 367 w_max : int 368 Upper bound for the summation window. 369 fft : bool 370 determines whether the fft algorithm is used for the computation 371 of the autocorrelation function. 372 """ 373 gamma = np.zeros(w_max) 374 deltas = _expand_deltas(deltas, idx, shape) 375 new_shape = len(deltas) 376 if fft: 377 max_gamma = min(new_shape, w_max) 378 # The padding for the fft has to be even 379 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 380 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 381 else: 382 for n in range(w_max): 383 if new_shape - n >= 0: 384 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 385 386 return gamma 387 388 def details(self, ens_content=True): 389 """Output detailed properties of the Obs. 390 391 Parameters 392 ---------- 393 ens_content : bool 394 print details about the ensembles and replica if true. 395 """ 396 if self.tag is not None: 397 print("Description:", self.tag) 398 if not hasattr(self, 'e_dvalue'): 399 print('Result\t %3.8e' % (self.value)) 400 else: 401 if self.value == 0.0: 402 percentage = np.nan 403 else: 404 percentage = np.abs(self._dvalue / self.value) * 100 405 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 406 if len(self.e_names) > 1: 407 print(' Ensemble errors:') 408 e_content = self.e_content 409 for e_name in self.mc_names: 410 if isinstance(self.idl[e_content[e_name][0]], range): 411 gap = self.idl[e_content[e_name][0]].step 412 else: 413 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 414 415 if len(self.e_names) > 1: 416 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 417 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 418 tau_string += f" in units of {gap} config" 419 if gap > 1: 420 tau_string += "s" 421 if self.tau_exp[e_name] > 0: 422 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 423 else: 424 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 425 print(tau_string) 426 for e_name in self.cov_names: 427 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 428 if ens_content is True: 429 if len(self.e_names) == 1: 430 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 431 else: 432 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 433 my_string_list = [] 434 for key, value in sorted(self.e_content.items()): 435 if key not in self.covobs: 436 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 437 if len(value) == 1: 438 my_string += f': {self.shape[value[0]]} configurations' 439 if isinstance(self.idl[value[0]], range): 440 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 441 else: 442 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 443 else: 444 sublist = [] 445 for v in value: 446 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 447 my_substring += f': {self.shape[v]} configurations' 448 if isinstance(self.idl[v], range): 449 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 450 else: 451 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 452 sublist.append(my_substring) 453 454 my_string += '\n' + '\n'.join(sublist) 455 else: 456 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 457 my_string_list.append(my_string) 458 print('\n'.join(my_string_list)) 459 460 def reweight(self, weight): 461 """Reweight the obs with given rewighting factors. 462 463 Parameters 464 ---------- 465 weight : Obs 466 Reweighting factor. An Observable that has to be defined on a superset of the 467 configurations in obs[i].idl for all i. 468 all_configs : bool 469 if True, the reweighted observables are normalized by the average of 470 the reweighting factor on all configurations in weight.idl and not 471 on the configurations in obs[i].idl. Default False. 472 """ 473 return reweight(weight, [self])[0] 474 475 def is_zero_within_error(self, sigma=1): 476 """Checks whether the observable is zero within 'sigma' standard errors. 477 478 Parameters 479 ---------- 480 sigma : int 481 Number of standard errors used for the check. 482 483 Works only properly when the gamma method was run. 484 """ 485 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 486 487 def is_zero(self, atol=1e-10): 488 """Checks whether the observable is zero within a given tolerance. 489 490 Parameters 491 ---------- 492 atol : float 493 Absolute tolerance (for details see numpy documentation). 494 """ 495 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 496 497 def plot_tauint(self, save=None): 498 """Plot integrated autocorrelation time for each ensemble. 499 500 Parameters 501 ---------- 502 save : str 503 saves the figure to a file named 'save' if. 504 """ 505 if not hasattr(self, 'e_dvalue'): 506 raise Exception('Run the gamma method first.') 507 508 for e, e_name in enumerate(self.mc_names): 509 fig = plt.figure() 510 plt.xlabel(r'$W$') 511 plt.ylabel(r'$\tau_\mathrm{int}$') 512 length = int(len(self.e_n_tauint[e_name])) 513 if self.tau_exp[e_name] > 0: 514 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 515 x_help = np.arange(2 * self.tau_exp[e_name]) 516 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 517 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 518 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 519 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 520 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 521 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 522 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 523 else: 524 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 525 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 526 527 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 528 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 529 plt.legend() 530 plt.xlim(-0.5, xmax) 531 ylim = plt.ylim() 532 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 533 plt.draw() 534 if save: 535 fig.savefig(save + "_" + str(e)) 536 537 def plot_rho(self, save=None): 538 """Plot normalized autocorrelation function time for each ensemble. 539 540 Parameters 541 ---------- 542 save : str 543 saves the figure to a file named 'save' if. 544 """ 545 if not hasattr(self, 'e_dvalue'): 546 raise Exception('Run the gamma method first.') 547 for e, e_name in enumerate(self.mc_names): 548 fig = plt.figure() 549 plt.xlabel('W') 550 plt.ylabel('rho') 551 length = int(len(self.e_drho[e_name])) 552 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 553 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 554 if self.tau_exp[e_name] > 0: 555 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 556 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 557 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 558 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 559 else: 560 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 561 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 562 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 563 plt.xlim(-0.5, xmax) 564 plt.draw() 565 if save: 566 fig.savefig(save + "_" + str(e)) 567 568 def plot_rep_dist(self): 569 """Plot replica distribution for each ensemble with more than one replicum.""" 570 if not hasattr(self, 'e_dvalue'): 571 raise Exception('Run the gamma method first.') 572 for e, e_name in enumerate(self.mc_names): 573 if len(self.e_content[e_name]) == 1: 574 print('No replica distribution for a single replicum (', e_name, ')') 575 continue 576 r_length = [] 577 sub_r_mean = 0 578 for r, r_name in enumerate(self.e_content[e_name]): 579 r_length.append(len(self.deltas[r_name])) 580 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 581 e_N = np.sum(r_length) 582 sub_r_mean /= e_N 583 arr = np.zeros(len(self.e_content[e_name])) 584 for r, r_name in enumerate(self.e_content[e_name]): 585 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 586 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 587 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 588 plt.draw() 589 590 def plot_history(self, expand=True): 591 """Plot derived Monte Carlo history for each ensemble 592 593 Parameters 594 ---------- 595 expand : bool 596 show expanded history for irregular Monte Carlo chains (default: True). 597 """ 598 for e, e_name in enumerate(self.mc_names): 599 plt.figure() 600 r_length = [] 601 tmp = [] 602 tmp_expanded = [] 603 for r, r_name in enumerate(self.e_content[e_name]): 604 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 605 if expand: 606 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 607 r_length.append(len(tmp_expanded[-1])) 608 else: 609 r_length.append(len(tmp[-1])) 610 e_N = np.sum(r_length) 611 x = np.arange(e_N) 612 y_test = np.concatenate(tmp, axis=0) 613 if expand: 614 y = np.concatenate(tmp_expanded, axis=0) 615 else: 616 y = y_test 617 plt.errorbar(x, y, fmt='.', markersize=3) 618 plt.xlim(-0.5, e_N - 0.5) 619 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 620 plt.draw() 621 622 def plot_piechart(self, save=None): 623 """Plot piechart which shows the fractional contribution of each 624 ensemble to the error and returns a dictionary containing the fractions. 625 626 Parameters 627 ---------- 628 save : str 629 saves the figure to a file named 'save' if. 630 """ 631 if not hasattr(self, 'e_dvalue'): 632 raise Exception('Run the gamma method first.') 633 if np.isclose(0.0, self._dvalue, atol=1e-15): 634 raise Exception('Error is 0.0') 635 labels = self.e_names 636 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 637 fig1, ax1 = plt.subplots() 638 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 639 ax1.axis('equal') 640 plt.draw() 641 if save: 642 fig1.savefig(save) 643 644 return dict(zip(self.e_names, sizes)) 645 646 def dump(self, filename, datatype="json.gz", description="", **kwargs): 647 """Dump the Obs to a file 'name' of chosen format. 648 649 Parameters 650 ---------- 651 filename : str 652 name of the file to be saved. 653 datatype : str 654 Format of the exported file. Supported formats include 655 "json.gz" and "pickle" 656 description : str 657 Description for output file, only relevant for json.gz format. 658 path : str 659 specifies a custom path for the file (default '.') 660 """ 661 if 'path' in kwargs: 662 file_name = kwargs.get('path') + '/' + filename 663 else: 664 file_name = filename 665 666 if datatype == "json.gz": 667 from .input.json import dump_to_json 668 dump_to_json([self], file_name, description=description) 669 elif datatype == "pickle": 670 with open(file_name + '.p', 'wb') as fb: 671 pickle.dump(self, fb) 672 else: 673 raise Exception("Unknown datatype " + str(datatype)) 674 675 def export_jackknife(self): 676 """Export jackknife samples from the Obs 677 678 Returns 679 ------- 680 numpy.ndarray 681 Returns a numpy array of length N + 1 where N is the number of samples 682 for the given ensemble and replicum. The zeroth entry of the array contains 683 the mean value of the Obs, entries 1 to N contain the N jackknife samples 684 derived from the Obs. The current implementation only works for observables 685 defined on exactly one ensemble and replicum. The derived jackknife samples 686 should agree with samples from a full jackknife analysis up to O(1/N). 687 """ 688 689 if len(self.names) != 1: 690 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 691 692 name = self.names[0] 693 full_data = self.deltas[name] + self.r_values[name] 694 n = full_data.size 695 mean = self.value 696 tmp_jacks = np.zeros(n + 1) 697 tmp_jacks[0] = mean 698 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 699 return tmp_jacks 700 701 def __float__(self): 702 return float(self.value) 703 704 def __repr__(self): 705 return 'Obs[' + str(self) + ']' 706 707 def __str__(self): 708 return _format_uncertainty(self.value, self._dvalue) 709 710 def __hash__(self): 711 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 712 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 713 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 714 hash_tuple += tuple([o.encode() for o in self.names]) 715 m = hashlib.md5() 716 [m.update(o) for o in hash_tuple] 717 return int(m.hexdigest(), 16) & 0xFFFFFFFF 718 719 # Overload comparisons 720 def __lt__(self, other): 721 return self.value < other 722 723 def __le__(self, other): 724 return self.value <= other 725 726 def __gt__(self, other): 727 return self.value > other 728 729 def __ge__(self, other): 730 return self.value >= other 731 732 def __eq__(self, other): 733 return (self - other).is_zero() 734 735 def __ne__(self, other): 736 return not (self - other).is_zero() 737 738 # Overload math operations 739 def __add__(self, y): 740 if isinstance(y, Obs): 741 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 742 else: 743 if isinstance(y, np.ndarray): 744 return np.array([self + o for o in y]) 745 elif y.__class__.__name__ in ['Corr', 'CObs']: 746 return NotImplemented 747 else: 748 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 749 750 def __radd__(self, y): 751 return self + y 752 753 def __mul__(self, y): 754 if isinstance(y, Obs): 755 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 756 else: 757 if isinstance(y, np.ndarray): 758 return np.array([self * o for o in y]) 759 elif isinstance(y, complex): 760 return CObs(self * y.real, self * y.imag) 761 elif y.__class__.__name__ in ['Corr', 'CObs']: 762 return NotImplemented 763 else: 764 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 765 766 def __rmul__(self, y): 767 return self * y 768 769 def __sub__(self, y): 770 if isinstance(y, Obs): 771 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 772 else: 773 if isinstance(y, np.ndarray): 774 return np.array([self - o for o in y]) 775 elif y.__class__.__name__ in ['Corr', 'CObs']: 776 return NotImplemented 777 else: 778 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 779 780 def __rsub__(self, y): 781 return -1 * (self - y) 782 783 def __pos__(self): 784 return self 785 786 def __neg__(self): 787 return -1 * self 788 789 def __truediv__(self, y): 790 if isinstance(y, Obs): 791 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 792 else: 793 if isinstance(y, np.ndarray): 794 return np.array([self / o for o in y]) 795 elif y.__class__.__name__ in ['Corr', 'CObs']: 796 return NotImplemented 797 else: 798 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 799 800 def __rtruediv__(self, y): 801 if isinstance(y, Obs): 802 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 803 else: 804 if isinstance(y, np.ndarray): 805 return np.array([o / self for o in y]) 806 elif y.__class__.__name__ in ['Corr', 'CObs']: 807 return NotImplemented 808 else: 809 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 810 811 def __pow__(self, y): 812 if isinstance(y, Obs): 813 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 814 else: 815 return derived_observable(lambda x: x[0] ** y, [self]) 816 817 def __rpow__(self, y): 818 if isinstance(y, Obs): 819 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 820 else: 821 return derived_observable(lambda x: y ** x[0], [self]) 822 823 def __abs__(self): 824 return derived_observable(lambda x: anp.abs(x[0]), [self]) 825 826 # Overload numpy functions 827 def sqrt(self): 828 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 829 830 def log(self): 831 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 832 833 def exp(self): 834 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 835 836 def sin(self): 837 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 838 839 def cos(self): 840 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 841 842 def tan(self): 843 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 844 845 def arcsin(self): 846 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 847 848 def arccos(self): 849 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 850 851 def arctan(self): 852 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 853 854 def sinh(self): 855 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 856 857 def cosh(self): 858 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 859 860 def tanh(self): 861 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 862 863 def arcsinh(self): 864 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 865 866 def arccosh(self): 867 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 868 869 def arctanh(self): 870 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
62 def __init__(self, samples, names, idl=None, **kwargs): 63 """ Initialize Obs object. 64 65 Parameters 66 ---------- 67 samples : list 68 list of numpy arrays containing the Monte Carlo samples 69 names : list 70 list of strings labeling the individual samples 71 idl : list, optional 72 list of ranges or lists on which the samples are defined 73 """ 74 75 if kwargs.get("means") is None and len(samples): 76 if len(samples) != len(names): 77 raise ValueError('Length of samples and names incompatible.') 78 if idl is not None: 79 if len(idl) != len(names): 80 raise ValueError('Length of idl incompatible with samples and names.') 81 name_length = len(names) 82 if name_length > 1: 83 if name_length != len(set(names)): 84 raise ValueError('Names are not unique.') 85 if not all(isinstance(x, str) for x in names): 86 raise TypeError('All names have to be strings.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise ValueError('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.idl = {} 102 if idl is not None: 103 for name, idx in sorted(zip(names, idl)): 104 if isinstance(idx, range): 105 self.idl[name] = idx 106 elif isinstance(idx, (list, np.ndarray)): 107 dc = np.unique(np.diff(idx)) 108 if np.any(dc < 0): 109 raise ValueError("Unsorted idx for idl[%s]" % (name)) 110 if len(dc) == 1: 111 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 112 else: 113 self.idl[name] = list(idx) 114 else: 115 raise TypeError('incompatible type for idl[%s].' % (name)) 116 else: 117 for name, sample in sorted(zip(names, samples)): 118 self.idl[name] = range(1, len(sample) + 1) 119 120 if kwargs.get("means") is not None: 121 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 self.r_values[name] = mean 125 self.deltas[name] = sample 126 else: 127 for name, sample in sorted(zip(names, samples)): 128 self.shape[name] = len(self.idl[name]) 129 self.N += self.shape[name] 130 if len(sample) != self.shape[name]: 131 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 132 self.r_values[name] = np.mean(sample) 133 self.deltas[name] = sample - self.r_values[name] 134 self._value += self.shape[name] * self.r_values[name] 135 self._value /= self.N 136 137 self._dvalue = 0.0 138 self.ddvalue = 0.0 139 self.reweighted = False 140 141 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name])) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 gaps = [] 273 for r_name in e_content[e_name]: 274 if isinstance(self.idl[r_name], range): 275 gaps.append(1) 276 else: 277 gaps.append(np.min(np.diff(self.idl[r_name]))) 278 279 if not np.all([gi == gaps[0] for gi in gaps]): 280 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 281 else: 282 gapsize = gaps[0] 283 284 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 285 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 286 # Make sure no entry of tauint is smaller than 0.5 287 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 288 # hep-lat/0306017 eq. (42) 289 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 290 self.e_n_dtauint[e_name][0] = 0.0 291 292 def _compute_drho(i): 293 tmp = (self.e_rho[e_name][i + 1:w_max] 294 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 295 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 296 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 297 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 298 299 if self.tau_exp[e_name] > 0: 300 _compute_drho(gapsize) 301 texp = self.tau_exp[e_name] 302 # Critical slowing down analysis 303 if w_max // 2 <= 1: 304 raise Exception("Need at least 8 samples for tau_exp error analysis") 305 for n in range(gapsize, w_max // 2, gapsize): 306 _compute_drho(n + gapsize) 307 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 308 # Bias correction hep-lat/0306017 eq. (49) included 309 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 310 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 311 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 312 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 313 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 314 self.e_windowsize[e_name] = n 315 break 316 else: 317 if self.S[e_name] == 0.0: 318 self.e_tauint[e_name] = 0.5 319 self.e_dtauint[e_name] = 0.0 320 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 322 self.e_windowsize[e_name] = 0 323 else: 324 # Standard automatic windowing procedure 325 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 326 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 327 for n in range(1, w_max // gapsize): 328 if g_w[n - 1] < 0 or n >= w_max // gapsize - 1: 329 _compute_drho(gapsize * n) 330 n *= gapsize 331 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 332 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 333 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 334 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 335 self.e_windowsize[e_name] = n 336 break 337 338 self._dvalue += self.e_dvalue[e_name] ** 2 339 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 340 341 for e_name in self.cov_names: 342 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 343 self.e_ddvalue[e_name] = 0 344 self._dvalue += self.e_dvalue[e_name]**2 345 346 self._dvalue = np.sqrt(self._dvalue) 347 if self._dvalue == 0.0: 348 self.ddvalue = 0.0 349 else: 350 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 351 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name])) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 gaps = [] 273 for r_name in e_content[e_name]: 274 if isinstance(self.idl[r_name], range): 275 gaps.append(1) 276 else: 277 gaps.append(np.min(np.diff(self.idl[r_name]))) 278 279 if not np.all([gi == gaps[0] for gi in gaps]): 280 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 281 else: 282 gapsize = gaps[0] 283 284 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 285 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 286 # Make sure no entry of tauint is smaller than 0.5 287 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 288 # hep-lat/0306017 eq. (42) 289 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 290 self.e_n_dtauint[e_name][0] = 0.0 291 292 def _compute_drho(i): 293 tmp = (self.e_rho[e_name][i + 1:w_max] 294 + np.concatenate([self.e_rho[e_name][i - 1:None if i - w_max // 2 < 0 else 2 * (i - w_max // 2):-1], 295 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 296 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 297 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 298 299 if self.tau_exp[e_name] > 0: 300 _compute_drho(gapsize) 301 texp = self.tau_exp[e_name] 302 # Critical slowing down analysis 303 if w_max // 2 <= 1: 304 raise Exception("Need at least 8 samples for tau_exp error analysis") 305 for n in range(gapsize, w_max // 2, gapsize): 306 _compute_drho(n + gapsize) 307 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 308 # Bias correction hep-lat/0306017 eq. (49) included 309 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 310 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 311 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 312 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 313 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 314 self.e_windowsize[e_name] = n 315 break 316 else: 317 if self.S[e_name] == 0.0: 318 self.e_tauint[e_name] = 0.5 319 self.e_dtauint[e_name] = 0.0 320 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 322 self.e_windowsize[e_name] = 0 323 else: 324 # Standard automatic windowing procedure 325 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 326 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 327 for n in range(1, w_max // gapsize): 328 if g_w[n - 1] < 0 or n >= w_max // gapsize - 1: 329 _compute_drho(gapsize * n) 330 n *= gapsize 331 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 332 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 333 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 334 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 335 self.e_windowsize[e_name] = n 336 break 337 338 self._dvalue += self.e_dvalue[e_name] ** 2 339 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 340 341 for e_name in self.cov_names: 342 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 343 self.e_ddvalue[e_name] = 0 344 self._dvalue += self.e_dvalue[e_name]**2 345 346 self._dvalue = np.sqrt(self._dvalue) 347 if self._dvalue == 0.0: 348 self.ddvalue = 0.0 349 else: 350 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 351 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
388 def details(self, ens_content=True): 389 """Output detailed properties of the Obs. 390 391 Parameters 392 ---------- 393 ens_content : bool 394 print details about the ensembles and replica if true. 395 """ 396 if self.tag is not None: 397 print("Description:", self.tag) 398 if not hasattr(self, 'e_dvalue'): 399 print('Result\t %3.8e' % (self.value)) 400 else: 401 if self.value == 0.0: 402 percentage = np.nan 403 else: 404 percentage = np.abs(self._dvalue / self.value) * 100 405 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 406 if len(self.e_names) > 1: 407 print(' Ensemble errors:') 408 e_content = self.e_content 409 for e_name in self.mc_names: 410 if isinstance(self.idl[e_content[e_name][0]], range): 411 gap = self.idl[e_content[e_name][0]].step 412 else: 413 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 414 415 if len(self.e_names) > 1: 416 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 417 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 418 tau_string += f" in units of {gap} config" 419 if gap > 1: 420 tau_string += "s" 421 if self.tau_exp[e_name] > 0: 422 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 423 else: 424 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 425 print(tau_string) 426 for e_name in self.cov_names: 427 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 428 if ens_content is True: 429 if len(self.e_names) == 1: 430 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 431 else: 432 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 433 my_string_list = [] 434 for key, value in sorted(self.e_content.items()): 435 if key not in self.covobs: 436 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 437 if len(value) == 1: 438 my_string += f': {self.shape[value[0]]} configurations' 439 if isinstance(self.idl[value[0]], range): 440 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 441 else: 442 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 443 else: 444 sublist = [] 445 for v in value: 446 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 447 my_substring += f': {self.shape[v]} configurations' 448 if isinstance(self.idl[v], range): 449 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 450 else: 451 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 452 sublist.append(my_substring) 453 454 my_string += '\n' + '\n'.join(sublist) 455 else: 456 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 457 my_string_list.append(my_string) 458 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
460 def reweight(self, weight): 461 """Reweight the obs with given rewighting factors. 462 463 Parameters 464 ---------- 465 weight : Obs 466 Reweighting factor. An Observable that has to be defined on a superset of the 467 configurations in obs[i].idl for all i. 468 all_configs : bool 469 if True, the reweighted observables are normalized by the average of 470 the reweighting factor on all configurations in weight.idl and not 471 on the configurations in obs[i].idl. Default False. 472 """ 473 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
475 def is_zero_within_error(self, sigma=1): 476 """Checks whether the observable is zero within 'sigma' standard errors. 477 478 Parameters 479 ---------- 480 sigma : int 481 Number of standard errors used for the check. 482 483 Works only properly when the gamma method was run. 484 """ 485 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
487 def is_zero(self, atol=1e-10): 488 """Checks whether the observable is zero within a given tolerance. 489 490 Parameters 491 ---------- 492 atol : float 493 Absolute tolerance (for details see numpy documentation). 494 """ 495 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
497 def plot_tauint(self, save=None): 498 """Plot integrated autocorrelation time for each ensemble. 499 500 Parameters 501 ---------- 502 save : str 503 saves the figure to a file named 'save' if. 504 """ 505 if not hasattr(self, 'e_dvalue'): 506 raise Exception('Run the gamma method first.') 507 508 for e, e_name in enumerate(self.mc_names): 509 fig = plt.figure() 510 plt.xlabel(r'$W$') 511 plt.ylabel(r'$\tau_\mathrm{int}$') 512 length = int(len(self.e_n_tauint[e_name])) 513 if self.tau_exp[e_name] > 0: 514 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 515 x_help = np.arange(2 * self.tau_exp[e_name]) 516 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 517 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 518 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 519 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 520 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 521 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 522 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 523 else: 524 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 525 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 526 527 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 528 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 529 plt.legend() 530 plt.xlim(-0.5, xmax) 531 ylim = plt.ylim() 532 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 533 plt.draw() 534 if save: 535 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
537 def plot_rho(self, save=None): 538 """Plot normalized autocorrelation function time for each ensemble. 539 540 Parameters 541 ---------- 542 save : str 543 saves the figure to a file named 'save' if. 544 """ 545 if not hasattr(self, 'e_dvalue'): 546 raise Exception('Run the gamma method first.') 547 for e, e_name in enumerate(self.mc_names): 548 fig = plt.figure() 549 plt.xlabel('W') 550 plt.ylabel('rho') 551 length = int(len(self.e_drho[e_name])) 552 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 553 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 554 if self.tau_exp[e_name] > 0: 555 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 556 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 557 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 558 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 559 else: 560 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 561 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 562 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 563 plt.xlim(-0.5, xmax) 564 plt.draw() 565 if save: 566 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
568 def plot_rep_dist(self): 569 """Plot replica distribution for each ensemble with more than one replicum.""" 570 if not hasattr(self, 'e_dvalue'): 571 raise Exception('Run the gamma method first.') 572 for e, e_name in enumerate(self.mc_names): 573 if len(self.e_content[e_name]) == 1: 574 print('No replica distribution for a single replicum (', e_name, ')') 575 continue 576 r_length = [] 577 sub_r_mean = 0 578 for r, r_name in enumerate(self.e_content[e_name]): 579 r_length.append(len(self.deltas[r_name])) 580 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 581 e_N = np.sum(r_length) 582 sub_r_mean /= e_N 583 arr = np.zeros(len(self.e_content[e_name])) 584 for r, r_name in enumerate(self.e_content[e_name]): 585 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 586 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 587 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 588 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
590 def plot_history(self, expand=True): 591 """Plot derived Monte Carlo history for each ensemble 592 593 Parameters 594 ---------- 595 expand : bool 596 show expanded history for irregular Monte Carlo chains (default: True). 597 """ 598 for e, e_name in enumerate(self.mc_names): 599 plt.figure() 600 r_length = [] 601 tmp = [] 602 tmp_expanded = [] 603 for r, r_name in enumerate(self.e_content[e_name]): 604 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 605 if expand: 606 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 607 r_length.append(len(tmp_expanded[-1])) 608 else: 609 r_length.append(len(tmp[-1])) 610 e_N = np.sum(r_length) 611 x = np.arange(e_N) 612 y_test = np.concatenate(tmp, axis=0) 613 if expand: 614 y = np.concatenate(tmp_expanded, axis=0) 615 else: 616 y = y_test 617 plt.errorbar(x, y, fmt='.', markersize=3) 618 plt.xlim(-0.5, e_N - 0.5) 619 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 620 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
622 def plot_piechart(self, save=None): 623 """Plot piechart which shows the fractional contribution of each 624 ensemble to the error and returns a dictionary containing the fractions. 625 626 Parameters 627 ---------- 628 save : str 629 saves the figure to a file named 'save' if. 630 """ 631 if not hasattr(self, 'e_dvalue'): 632 raise Exception('Run the gamma method first.') 633 if np.isclose(0.0, self._dvalue, atol=1e-15): 634 raise Exception('Error is 0.0') 635 labels = self.e_names 636 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 637 fig1, ax1 = plt.subplots() 638 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 639 ax1.axis('equal') 640 plt.draw() 641 if save: 642 fig1.savefig(save) 643 644 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
646 def dump(self, filename, datatype="json.gz", description="", **kwargs): 647 """Dump the Obs to a file 'name' of chosen format. 648 649 Parameters 650 ---------- 651 filename : str 652 name of the file to be saved. 653 datatype : str 654 Format of the exported file. Supported formats include 655 "json.gz" and "pickle" 656 description : str 657 Description for output file, only relevant for json.gz format. 658 path : str 659 specifies a custom path for the file (default '.') 660 """ 661 if 'path' in kwargs: 662 file_name = kwargs.get('path') + '/' + filename 663 else: 664 file_name = filename 665 666 if datatype == "json.gz": 667 from .input.json import dump_to_json 668 dump_to_json([self], file_name, description=description) 669 elif datatype == "pickle": 670 with open(file_name + '.p', 'wb') as fb: 671 pickle.dump(self, fb) 672 else: 673 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
675 def export_jackknife(self): 676 """Export jackknife samples from the Obs 677 678 Returns 679 ------- 680 numpy.ndarray 681 Returns a numpy array of length N + 1 where N is the number of samples 682 for the given ensemble and replicum. The zeroth entry of the array contains 683 the mean value of the Obs, entries 1 to N contain the N jackknife samples 684 derived from the Obs. The current implementation only works for observables 685 defined on exactly one ensemble and replicum. The derived jackknife samples 686 should agree with samples from a full jackknife analysis up to O(1/N). 687 """ 688 689 if len(self.names) != 1: 690 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 691 692 name = self.names[0] 693 full_data = self.deltas[name] + self.r_values[name] 694 n = full_data.size 695 mean = self.value 696 tmp_jacks = np.zeros(n + 1) 697 tmp_jacks[0] = mean 698 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 699 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
873class CObs: 874 """Class for a complex valued observable.""" 875 __slots__ = ['_real', '_imag', 'tag'] 876 877 def __init__(self, real, imag=0.0): 878 self._real = real 879 self._imag = imag 880 self.tag = None 881 882 @property 883 def real(self): 884 return self._real 885 886 @property 887 def imag(self): 888 return self._imag 889 890 def gamma_method(self, **kwargs): 891 """Executes the gamma_method for the real and the imaginary part.""" 892 if isinstance(self.real, Obs): 893 self.real.gamma_method(**kwargs) 894 if isinstance(self.imag, Obs): 895 self.imag.gamma_method(**kwargs) 896 897 def is_zero(self): 898 """Checks whether both real and imaginary part are zero within machine precision.""" 899 return self.real == 0.0 and self.imag == 0.0 900 901 def conjugate(self): 902 return CObs(self.real, -self.imag) 903 904 def __add__(self, other): 905 if isinstance(other, np.ndarray): 906 return other + self 907 elif hasattr(other, 'real') and hasattr(other, 'imag'): 908 return CObs(self.real + other.real, 909 self.imag + other.imag) 910 else: 911 return CObs(self.real + other, self.imag) 912 913 def __radd__(self, y): 914 return self + y 915 916 def __sub__(self, other): 917 if isinstance(other, np.ndarray): 918 return -1 * (other - self) 919 elif hasattr(other, 'real') and hasattr(other, 'imag'): 920 return CObs(self.real - other.real, self.imag - other.imag) 921 else: 922 return CObs(self.real - other, self.imag) 923 924 def __rsub__(self, other): 925 return -1 * (self - other) 926 927 def __mul__(self, other): 928 if isinstance(other, np.ndarray): 929 return other * self 930 elif hasattr(other, 'real') and hasattr(other, 'imag'): 931 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 932 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 933 [self.real, other.real, self.imag, other.imag], 934 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 935 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 936 [self.real, other.real, self.imag, other.imag], 937 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 938 elif getattr(other, 'imag', 0) != 0: 939 return CObs(self.real * other.real - self.imag * other.imag, 940 self.imag * other.real + self.real * other.imag) 941 else: 942 return CObs(self.real * other.real, self.imag * other.real) 943 else: 944 return CObs(self.real * other, self.imag * other) 945 946 def __rmul__(self, other): 947 return self * other 948 949 def __truediv__(self, other): 950 if isinstance(other, np.ndarray): 951 return 1 / (other / self) 952 elif hasattr(other, 'real') and hasattr(other, 'imag'): 953 r = other.real ** 2 + other.imag ** 2 954 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 955 else: 956 return CObs(self.real / other, self.imag / other) 957 958 def __rtruediv__(self, other): 959 r = self.real ** 2 + self.imag ** 2 960 if hasattr(other, 'real') and hasattr(other, 'imag'): 961 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 962 else: 963 return CObs(self.real * other / r, -self.imag * other / r) 964 965 def __abs__(self): 966 return np.sqrt(self.real**2 + self.imag**2) 967 968 def __pos__(self): 969 return self 970 971 def __neg__(self): 972 return -1 * self 973 974 def __eq__(self, other): 975 return self.real == other.real and self.imag == other.imag 976 977 def __str__(self): 978 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 979 980 def __repr__(self): 981 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
890 def gamma_method(self, **kwargs): 891 """Executes the gamma_method for the real and the imaginary part.""" 892 if isinstance(self.real, Obs): 893 self.real.gamma_method(**kwargs) 894 if isinstance(self.imag, Obs): 895 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1106def derived_observable(func, data, array_mode=False, **kwargs): 1107 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1108 1109 Parameters 1110 ---------- 1111 func : object 1112 arbitrary function of the form func(data, **kwargs). For the 1113 automatic differentiation to work, all numpy functions have to have 1114 the autograd wrapper (use 'import autograd.numpy as anp'). 1115 data : list 1116 list of Obs, e.g. [obs1, obs2, obs3]. 1117 num_grad : bool 1118 if True, numerical derivatives are used instead of autograd 1119 (default False). To control the numerical differentiation the 1120 kwargs of numdifftools.step_generators.MaxStepGenerator 1121 can be used. 1122 man_grad : list 1123 manually supply a list or an array which contains the jacobian 1124 of func. Use cautiously, supplying the wrong derivative will 1125 not be intercepted. 1126 1127 Notes 1128 ----- 1129 For simple mathematical operations it can be practical to use anonymous 1130 functions. For the ratio of two observables one can e.g. use 1131 1132 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1133 """ 1134 1135 data = np.asarray(data) 1136 raveled_data = data.ravel() 1137 1138 # Workaround for matrix operations containing non Obs data 1139 if not all(isinstance(x, Obs) for x in raveled_data): 1140 for i in range(len(raveled_data)): 1141 if isinstance(raveled_data[i], (int, float)): 1142 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1143 1144 allcov = {} 1145 for o in raveled_data: 1146 for name in o.cov_names: 1147 if name in allcov: 1148 if not np.allclose(allcov[name], o.covobs[name].cov): 1149 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1150 else: 1151 allcov[name] = o.covobs[name].cov 1152 1153 n_obs = len(raveled_data) 1154 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1155 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1156 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1157 1158 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1159 1160 if data.ndim == 1: 1161 values = np.array([o.value for o in data]) 1162 else: 1163 values = np.vectorize(lambda x: x.value)(data) 1164 1165 new_values = func(values, **kwargs) 1166 1167 multi = int(isinstance(new_values, np.ndarray)) 1168 1169 new_r_values = {} 1170 new_idl_d = {} 1171 for name in new_sample_names: 1172 idl = [] 1173 tmp_values = np.zeros(n_obs) 1174 for i, item in enumerate(raveled_data): 1175 tmp_values[i] = item.r_values.get(name, item.value) 1176 tmp_idl = item.idl.get(name) 1177 if tmp_idl is not None: 1178 idl.append(tmp_idl) 1179 if multi > 0: 1180 tmp_values = np.array(tmp_values).reshape(data.shape) 1181 new_r_values[name] = func(tmp_values, **kwargs) 1182 new_idl_d[name] = _merge_idx(idl) 1183 1184 if 'man_grad' in kwargs: 1185 deriv = np.asarray(kwargs.get('man_grad')) 1186 if new_values.shape + data.shape != deriv.shape: 1187 raise Exception('Manual derivative does not have correct shape.') 1188 elif kwargs.get('num_grad') is True: 1189 if multi > 0: 1190 raise Exception('Multi mode currently not supported for numerical derivative') 1191 options = { 1192 'base_step': 0.1, 1193 'step_ratio': 2.5} 1194 for key in options.keys(): 1195 kwarg = kwargs.get(key) 1196 if kwarg is not None: 1197 options[key] = kwarg 1198 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1199 if tmp_df.size == 1: 1200 deriv = np.array([tmp_df.real]) 1201 else: 1202 deriv = tmp_df.real 1203 else: 1204 deriv = jacobian(func)(values, **kwargs) 1205 1206 final_result = np.zeros(new_values.shape, dtype=object) 1207 1208 if array_mode is True: 1209 1210 class _Zero_grad(): 1211 def __init__(self, N): 1212 self.grad = np.zeros((N, 1)) 1213 1214 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1215 d_extracted = {} 1216 g_extracted = {} 1217 for name in new_sample_names: 1218 d_extracted[name] = [] 1219 ens_length = len(new_idl_d[name]) 1220 for i_dat, dat in enumerate(data): 1221 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1222 for name in new_cov_names: 1223 g_extracted[name] = [] 1224 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1225 for i_dat, dat in enumerate(data): 1226 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1227 1228 for i_val, new_val in np.ndenumerate(new_values): 1229 new_deltas = {} 1230 new_grad = {} 1231 if array_mode is True: 1232 for name in new_sample_names: 1233 ens_length = d_extracted[name][0].shape[-1] 1234 new_deltas[name] = np.zeros(ens_length) 1235 for i_dat, dat in enumerate(d_extracted[name]): 1236 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1237 for name in new_cov_names: 1238 new_grad[name] = 0 1239 for i_dat, dat in enumerate(g_extracted[name]): 1240 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1241 else: 1242 for j_obs, obs in np.ndenumerate(data): 1243 for name in obs.names: 1244 if name in obs.cov_names: 1245 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1246 else: 1247 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1248 1249 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1250 1251 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1252 raise Exception('The same name has been used for deltas and covobs!') 1253 new_samples = [] 1254 new_means = [] 1255 new_idl = [] 1256 new_names_obs = [] 1257 for name in new_names: 1258 if name not in new_covobs: 1259 new_samples.append(new_deltas[name]) 1260 new_idl.append(new_idl_d[name]) 1261 new_means.append(new_r_values[name][i_val]) 1262 new_names_obs.append(name) 1263 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1264 for name in new_covobs: 1265 final_result[i_val].names.append(name) 1266 final_result[i_val]._covobs = new_covobs 1267 final_result[i_val]._value = new_val 1268 final_result[i_val].reweighted = reweighted 1269 1270 if multi == 0: 1271 final_result = final_result.item() 1272 1273 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1310def reweight(weight, obs, **kwargs): 1311 """Reweight a list of observables. 1312 1313 Parameters 1314 ---------- 1315 weight : Obs 1316 Reweighting factor. An Observable that has to be defined on a superset of the 1317 configurations in obs[i].idl for all i. 1318 obs : list 1319 list of Obs, e.g. [obs1, obs2, obs3]. 1320 all_configs : bool 1321 if True, the reweighted observables are normalized by the average of 1322 the reweighting factor on all configurations in weight.idl and not 1323 on the configurations in obs[i].idl. Default False. 1324 """ 1325 result = [] 1326 for i in range(len(obs)): 1327 if len(obs[i].cov_names): 1328 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1329 if not set(obs[i].names).issubset(weight.names): 1330 raise Exception('Error: Ensembles do not fit') 1331 for name in obs[i].names: 1332 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1333 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1334 new_samples = [] 1335 w_deltas = {} 1336 for name in sorted(obs[i].names): 1337 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1338 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1339 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1340 1341 if kwargs.get('all_configs'): 1342 new_weight = weight 1343 else: 1344 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1345 1346 result.append(tmp_obs / new_weight) 1347 result[-1].reweighted = True 1348 1349 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1352def correlate(obs_a, obs_b): 1353 """Correlate two observables. 1354 1355 Parameters 1356 ---------- 1357 obs_a : Obs 1358 First observable 1359 obs_b : Obs 1360 Second observable 1361 1362 Notes 1363 ----- 1364 Keep in mind to only correlate primary observables which have not been reweighted 1365 yet. The reweighting has to be applied after correlating the observables. 1366 Currently only works if ensembles are identical (this is not strictly necessary). 1367 """ 1368 1369 if sorted(obs_a.names) != sorted(obs_b.names): 1370 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1371 if len(obs_a.cov_names) or len(obs_b.cov_names): 1372 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1373 for name in obs_a.names: 1374 if obs_a.shape[name] != obs_b.shape[name]: 1375 raise Exception('Shapes of ensemble', name, 'do not fit') 1376 if obs_a.idl[name] != obs_b.idl[name]: 1377 raise Exception('idl of ensemble', name, 'do not fit') 1378 1379 if obs_a.reweighted is True: 1380 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1381 if obs_b.reweighted is True: 1382 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1383 1384 new_samples = [] 1385 new_idl = [] 1386 for name in sorted(obs_a.names): 1387 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1388 new_idl.append(obs_a.idl[name]) 1389 1390 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1391 o.reweighted = obs_a.reweighted or obs_b.reweighted 1392 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1395def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1396 r'''Calculates the error covariance matrix of a set of observables. 1397 1398 WARNING: This function should be used with care, especially for observables with support on multiple 1399 ensembles with differing autocorrelations. See the notes below for details. 1400 1401 The gamma method has to be applied first to all observables. 1402 1403 Parameters 1404 ---------- 1405 obs : list or numpy.ndarray 1406 List or one dimensional array of Obs 1407 visualize : bool 1408 If True plots the corresponding normalized correlation matrix (default False). 1409 correlation : bool 1410 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1411 smooth : None or int 1412 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1413 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1414 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1415 small ones. 1416 1417 Notes 1418 ----- 1419 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1420 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1421 in the absence of autocorrelation. 1422 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1423 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1424 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1425 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1426 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1427 ''' 1428 1429 length = len(obs) 1430 1431 max_samples = np.max([o.N for o in obs]) 1432 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1433 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1434 1435 cov = np.zeros((length, length)) 1436 for i in range(length): 1437 for j in range(i, length): 1438 cov[i, j] = _covariance_element(obs[i], obs[j]) 1439 cov = cov + cov.T - np.diag(np.diag(cov)) 1440 1441 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1442 1443 if isinstance(smooth, int): 1444 corr = _smooth_eigenvalues(corr, smooth) 1445 1446 if visualize: 1447 plt.matshow(corr, vmin=-1, vmax=1) 1448 plt.set_cmap('RdBu') 1449 plt.colorbar() 1450 plt.draw() 1451 1452 if correlation is True: 1453 return corr 1454 1455 errors = [o.dvalue for o in obs] 1456 cov = np.diag(errors) @ corr @ np.diag(errors) 1457 1458 eigenvalues = np.linalg.eigh(cov)[0] 1459 if not np.all(eigenvalues >= 0): 1460 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1461 1462 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1542def import_jackknife(jacks, name, idl=None): 1543 """Imports jackknife samples and returns an Obs 1544 1545 Parameters 1546 ---------- 1547 jacks : numpy.ndarray 1548 numpy array containing the mean value as zeroth entry and 1549 the N jackknife samples as first to Nth entry. 1550 name : str 1551 name of the ensemble the samples are defined on. 1552 """ 1553 length = len(jacks) - 1 1554 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1555 samples = jacks[1:] @ prj 1556 mean = np.mean(samples) 1557 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1558 new_obs._value = jacks[0] 1559 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1562def merge_obs(list_of_obs): 1563 """Combine all observables in list_of_obs into one new observable 1564 1565 Parameters 1566 ---------- 1567 list_of_obs : list 1568 list of the Obs object to be combined 1569 1570 Notes 1571 ----- 1572 It is not possible to combine obs which are based on the same replicum 1573 """ 1574 replist = [item for obs in list_of_obs for item in obs.names] 1575 if (len(replist) == len(set(replist))) is False: 1576 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1577 if any([len(o.cov_names) for o in list_of_obs]): 1578 raise Exception('Not possible to merge data that contains covobs!') 1579 new_dict = {} 1580 idl_dict = {} 1581 for o in list_of_obs: 1582 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1583 for key in set(o.deltas) | set(o.r_values)}) 1584 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1585 1586 names = sorted(new_dict.keys()) 1587 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1588 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1589 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1592def cov_Obs(means, cov, name, grad=None): 1593 """Create an Obs based on mean(s) and a covariance matrix 1594 1595 Parameters 1596 ---------- 1597 mean : list of floats or float 1598 N mean value(s) of the new Obs 1599 cov : list or array 1600 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1601 name : str 1602 identifier for the covariance matrix 1603 grad : list or array 1604 Gradient of the Covobs wrt. the means belonging to cov. 1605 """ 1606 1607 def covobs_to_obs(co): 1608 """Make an Obs out of a Covobs 1609 1610 Parameters 1611 ---------- 1612 co : Covobs 1613 Covobs to be embedded into the Obs 1614 """ 1615 o = Obs([], [], means=[]) 1616 o._value = co.value 1617 o.names.append(co.name) 1618 o._covobs[co.name] = co 1619 o._dvalue = np.sqrt(co.errsq()) 1620 return o 1621 1622 ol = [] 1623 if isinstance(means, (float, int)): 1624 means = [means] 1625 1626 for i in range(len(means)): 1627 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1628 if ol[0].covobs[name].N != len(means): 1629 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1630 if len(ol) == 1: 1631 return ol[0] 1632 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.