pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15 16class Obs: 17 """Class for a general observable. 18 19 Instances of Obs are the basic objects of a pyerrors error analysis. 20 They are initialized with a list which contains arrays of samples for 21 different ensembles/replica and another list of same length which contains 22 the names of the ensembles/replica. Mathematical operations can be 23 performed on instances. The result is another instance of Obs. The error of 24 an instance can be computed with the gamma_method. Also contains additional 25 methods for output and visualization of the error calculation. 26 27 Attributes 28 ---------- 29 S_global : float 30 Standard value for S (default 2.0) 31 S_dict : dict 32 Dictionary for S values. If an entry for a given ensemble 33 exists this overwrites the standard value for that ensemble. 34 tau_exp_global : float 35 Standard value for tau_exp (default 0.0) 36 tau_exp_dict : dict 37 Dictionary for tau_exp values. If an entry for a given ensemble exists 38 this overwrites the standard value for that ensemble. 39 N_sigma_global : float 40 Standard value for N_sigma (default 1.0) 41 N_sigma_dict : dict 42 Dictionary for N_sigma values. If an entry for a given ensemble exists 43 this overwrites the standard value for that ensemble. 44 """ 45 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 46 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 47 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 48 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 49 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 50 51 S_global = 2.0 52 S_dict = {} 53 tau_exp_global = 0.0 54 tau_exp_dict = {} 55 N_sigma_global = 1.0 56 N_sigma_dict = {} 57 filter_eps = 1e-10 58 59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 gaps = [] 271 for r_name in e_content[e_name]: 272 if isinstance(self.idl[r_name], range): 273 gaps.append(1) 274 else: 275 gaps.append(np.min(np.diff(self.idl[r_name]))) 276 277 if not np.all([gi == gaps[0] for gi in gaps]): 278 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 279 else: 280 gapsize = gaps[0] 281 282 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 283 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 284 # Make sure no entry of tauint is smaller than 0.5 285 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 286 # hep-lat/0306017 eq. (42) 287 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 288 self.e_n_dtauint[e_name][0] = 0.0 289 290 def _compute_drho(i): 291 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 292 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 293 294 _compute_drho(gapsize) 295 if self.tau_exp[e_name] > 0: 296 texp = self.tau_exp[e_name] 297 # Critical slowing down analysis 298 if w_max // 2 <= 1: 299 raise Exception("Need at least 8 samples for tau_exp error analysis") 300 for n in range(gapsize, w_max // 2, gapsize): 301 _compute_drho(n + gapsize) 302 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 303 # Bias correction hep-lat/0306017 eq. (49) included 304 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 305 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 306 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 307 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 308 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 309 self.e_windowsize[e_name] = n 310 break 311 else: 312 if self.S[e_name] == 0.0: 313 self.e_tauint[e_name] = 0.5 314 self.e_dtauint[e_name] = 0.0 315 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 316 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 317 self.e_windowsize[e_name] = 0 318 else: 319 # Standard automatic windowing procedure 320 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 321 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 322 for n in range(1, w_max): 323 if n < w_max // 2 - 2: 324 _compute_drho(gapsize * n + gapsize) 325 if g_w[n - 1] < 0 or n >= w_max - 1: 326 n *= gapsize 327 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 328 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 329 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 330 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 331 self.e_windowsize[e_name] = n 332 break 333 334 self._dvalue += self.e_dvalue[e_name] ** 2 335 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 336 337 for e_name in self.cov_names: 338 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 339 self.e_ddvalue[e_name] = 0 340 self._dvalue += self.e_dvalue[e_name]**2 341 342 self._dvalue = np.sqrt(self._dvalue) 343 if self._dvalue == 0.0: 344 self.ddvalue = 0.0 345 else: 346 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 347 return 348 349 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 350 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 351 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 352 353 Parameters 354 ---------- 355 deltas : list 356 List of fluctuations 357 idx : list 358 List or range of configurations on which the deltas are defined. 359 shape : int 360 Number of configurations in idx. 361 w_max : int 362 Upper bound for the summation window. 363 fft : bool 364 determines whether the fft algorithm is used for the computation 365 of the autocorrelation function. 366 """ 367 gamma = np.zeros(w_max) 368 deltas = _expand_deltas(deltas, idx, shape) 369 new_shape = len(deltas) 370 if fft: 371 max_gamma = min(new_shape, w_max) 372 # The padding for the fft has to be even 373 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 374 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 375 else: 376 for n in range(w_max): 377 if new_shape - n >= 0: 378 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 379 380 return gamma 381 382 def details(self, ens_content=True): 383 """Output detailed properties of the Obs. 384 385 Parameters 386 ---------- 387 ens_content : bool 388 print details about the ensembles and replica if true. 389 """ 390 if self.tag is not None: 391 print("Description:", self.tag) 392 if not hasattr(self, 'e_dvalue'): 393 print('Result\t %3.8e' % (self.value)) 394 else: 395 if self.value == 0.0: 396 percentage = np.nan 397 else: 398 percentage = np.abs(self._dvalue / self.value) * 100 399 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 400 if len(self.e_names) > 1: 401 print(' Ensemble errors:') 402 e_content = self.e_content 403 for e_name in self.mc_names: 404 if isinstance(self.idl[e_content[e_name][0]], range): 405 gap = self.idl[e_content[e_name][0]].step 406 else: 407 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 408 409 if len(self.e_names) > 1: 410 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 411 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 412 tau_string += f" in units of {gap} config" 413 if gap > 1: 414 tau_string += "s" 415 if self.tau_exp[e_name] > 0: 416 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 417 else: 418 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 419 print(tau_string) 420 for e_name in self.cov_names: 421 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 422 if ens_content is True: 423 if len(self.e_names) == 1: 424 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 425 else: 426 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 427 my_string_list = [] 428 for key, value in sorted(self.e_content.items()): 429 if key not in self.covobs: 430 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 431 if len(value) == 1: 432 my_string += f': {self.shape[value[0]]} configurations' 433 if isinstance(self.idl[value[0]], range): 434 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 435 else: 436 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 437 else: 438 sublist = [] 439 for v in value: 440 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 441 my_substring += f': {self.shape[v]} configurations' 442 if isinstance(self.idl[v], range): 443 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 444 else: 445 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 446 sublist.append(my_substring) 447 448 my_string += '\n' + '\n'.join(sublist) 449 else: 450 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 451 my_string_list.append(my_string) 452 print('\n'.join(my_string_list)) 453 454 def reweight(self, weight): 455 """Reweight the obs with given rewighting factors. 456 457 Parameters 458 ---------- 459 weight : Obs 460 Reweighting factor. An Observable that has to be defined on a superset of the 461 configurations in obs[i].idl for all i. 462 all_configs : bool 463 if True, the reweighted observables are normalized by the average of 464 the reweighting factor on all configurations in weight.idl and not 465 on the configurations in obs[i].idl. Default False. 466 """ 467 return reweight(weight, [self])[0] 468 469 def is_zero_within_error(self, sigma=1): 470 """Checks whether the observable is zero within 'sigma' standard errors. 471 472 Parameters 473 ---------- 474 sigma : int 475 Number of standard errors used for the check. 476 477 Works only properly when the gamma method was run. 478 """ 479 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 480 481 def is_zero(self, atol=1e-10): 482 """Checks whether the observable is zero within a given tolerance. 483 484 Parameters 485 ---------- 486 atol : float 487 Absolute tolerance (for details see numpy documentation). 488 """ 489 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 490 491 def plot_tauint(self, save=None): 492 """Plot integrated autocorrelation time for each ensemble. 493 494 Parameters 495 ---------- 496 save : str 497 saves the figure to a file named 'save' if. 498 """ 499 if not hasattr(self, 'e_dvalue'): 500 raise Exception('Run the gamma method first.') 501 502 for e, e_name in enumerate(self.mc_names): 503 fig = plt.figure() 504 plt.xlabel(r'$W$') 505 plt.ylabel(r'$\tau_\mathrm{int}$') 506 length = int(len(self.e_n_tauint[e_name])) 507 if self.tau_exp[e_name] > 0: 508 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 509 x_help = np.arange(2 * self.tau_exp[e_name]) 510 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 511 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 512 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 513 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 514 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 515 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 516 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 517 else: 518 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 519 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 520 521 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 522 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 523 plt.legend() 524 plt.xlim(-0.5, xmax) 525 ylim = plt.ylim() 526 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 527 plt.draw() 528 if save: 529 fig.savefig(save + "_" + str(e)) 530 531 def plot_rho(self, save=None): 532 """Plot normalized autocorrelation function time for each ensemble. 533 534 Parameters 535 ---------- 536 save : str 537 saves the figure to a file named 'save' if. 538 """ 539 if not hasattr(self, 'e_dvalue'): 540 raise Exception('Run the gamma method first.') 541 for e, e_name in enumerate(self.mc_names): 542 fig = plt.figure() 543 plt.xlabel('W') 544 plt.ylabel('rho') 545 length = int(len(self.e_drho[e_name])) 546 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 547 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 548 if self.tau_exp[e_name] > 0: 549 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 550 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 551 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 552 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 553 else: 554 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 555 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 556 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 557 plt.xlim(-0.5, xmax) 558 plt.draw() 559 if save: 560 fig.savefig(save + "_" + str(e)) 561 562 def plot_rep_dist(self): 563 """Plot replica distribution for each ensemble with more than one replicum.""" 564 if not hasattr(self, 'e_dvalue'): 565 raise Exception('Run the gamma method first.') 566 for e, e_name in enumerate(self.mc_names): 567 if len(self.e_content[e_name]) == 1: 568 print('No replica distribution for a single replicum (', e_name, ')') 569 continue 570 r_length = [] 571 sub_r_mean = 0 572 for r, r_name in enumerate(self.e_content[e_name]): 573 r_length.append(len(self.deltas[r_name])) 574 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 575 e_N = np.sum(r_length) 576 sub_r_mean /= e_N 577 arr = np.zeros(len(self.e_content[e_name])) 578 for r, r_name in enumerate(self.e_content[e_name]): 579 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 580 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 581 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 582 plt.draw() 583 584 def plot_history(self, expand=True): 585 """Plot derived Monte Carlo history for each ensemble 586 587 Parameters 588 ---------- 589 expand : bool 590 show expanded history for irregular Monte Carlo chains (default: True). 591 """ 592 for e, e_name in enumerate(self.mc_names): 593 plt.figure() 594 r_length = [] 595 tmp = [] 596 tmp_expanded = [] 597 for r, r_name in enumerate(self.e_content[e_name]): 598 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 599 if expand: 600 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 601 r_length.append(len(tmp_expanded[-1])) 602 else: 603 r_length.append(len(tmp[-1])) 604 e_N = np.sum(r_length) 605 x = np.arange(e_N) 606 y_test = np.concatenate(tmp, axis=0) 607 if expand: 608 y = np.concatenate(tmp_expanded, axis=0) 609 else: 610 y = y_test 611 plt.errorbar(x, y, fmt='.', markersize=3) 612 plt.xlim(-0.5, e_N - 0.5) 613 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 614 plt.draw() 615 616 def plot_piechart(self, save=None): 617 """Plot piechart which shows the fractional contribution of each 618 ensemble to the error and returns a dictionary containing the fractions. 619 620 Parameters 621 ---------- 622 save : str 623 saves the figure to a file named 'save' if. 624 """ 625 if not hasattr(self, 'e_dvalue'): 626 raise Exception('Run the gamma method first.') 627 if np.isclose(0.0, self._dvalue, atol=1e-15): 628 raise Exception('Error is 0.0') 629 labels = self.e_names 630 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 631 fig1, ax1 = plt.subplots() 632 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 633 ax1.axis('equal') 634 plt.draw() 635 if save: 636 fig1.savefig(save) 637 638 return dict(zip(self.e_names, sizes)) 639 640 def dump(self, filename, datatype="json.gz", description="", **kwargs): 641 """Dump the Obs to a file 'name' of chosen format. 642 643 Parameters 644 ---------- 645 filename : str 646 name of the file to be saved. 647 datatype : str 648 Format of the exported file. Supported formats include 649 "json.gz" and "pickle" 650 description : str 651 Description for output file, only relevant for json.gz format. 652 path : str 653 specifies a custom path for the file (default '.') 654 """ 655 if 'path' in kwargs: 656 file_name = kwargs.get('path') + '/' + filename 657 else: 658 file_name = filename 659 660 if datatype == "json.gz": 661 from .input.json import dump_to_json 662 dump_to_json([self], file_name, description=description) 663 elif datatype == "pickle": 664 with open(file_name + '.p', 'wb') as fb: 665 pickle.dump(self, fb) 666 else: 667 raise Exception("Unknown datatype " + str(datatype)) 668 669 def export_jackknife(self): 670 """Export jackknife samples from the Obs 671 672 Returns 673 ------- 674 numpy.ndarray 675 Returns a numpy array of length N + 1 where N is the number of samples 676 for the given ensemble and replicum. The zeroth entry of the array contains 677 the mean value of the Obs, entries 1 to N contain the N jackknife samples 678 derived from the Obs. The current implementation only works for observables 679 defined on exactly one ensemble and replicum. The derived jackknife samples 680 should agree with samples from a full jackknife analysis up to O(1/N). 681 """ 682 683 if len(self.names) != 1: 684 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 685 686 name = self.names[0] 687 full_data = self.deltas[name] + self.r_values[name] 688 n = full_data.size 689 mean = self.value 690 tmp_jacks = np.zeros(n + 1) 691 tmp_jacks[0] = mean 692 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 693 return tmp_jacks 694 695 def __float__(self): 696 return float(self.value) 697 698 def __repr__(self): 699 return 'Obs[' + str(self) + ']' 700 701 def __str__(self): 702 if self._dvalue == 0.0: 703 return str(self.value) 704 return _format_uncertainty(self.value, self._dvalue) 705 706 def __hash__(self): 707 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 708 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 709 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 710 hash_tuple += tuple([o.encode() for o in self.names]) 711 m = hashlib.md5() 712 [m.update(o) for o in hash_tuple] 713 return int(m.hexdigest(), 16) & 0xFFFFFFFF 714 715 # Overload comparisons 716 def __lt__(self, other): 717 return self.value < other 718 719 def __le__(self, other): 720 return self.value <= other 721 722 def __gt__(self, other): 723 return self.value > other 724 725 def __ge__(self, other): 726 return self.value >= other 727 728 def __eq__(self, other): 729 return (self - other).is_zero() 730 731 def __ne__(self, other): 732 return not (self - other).is_zero() 733 734 # Overload math operations 735 def __add__(self, y): 736 if isinstance(y, Obs): 737 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 738 else: 739 if isinstance(y, np.ndarray): 740 return np.array([self + o for o in y]) 741 elif y.__class__.__name__ in ['Corr', 'CObs']: 742 return NotImplemented 743 else: 744 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 745 746 def __radd__(self, y): 747 return self + y 748 749 def __mul__(self, y): 750 if isinstance(y, Obs): 751 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 752 else: 753 if isinstance(y, np.ndarray): 754 return np.array([self * o for o in y]) 755 elif isinstance(y, complex): 756 return CObs(self * y.real, self * y.imag) 757 elif y.__class__.__name__ in ['Corr', 'CObs']: 758 return NotImplemented 759 else: 760 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 761 762 def __rmul__(self, y): 763 return self * y 764 765 def __sub__(self, y): 766 if isinstance(y, Obs): 767 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 768 else: 769 if isinstance(y, np.ndarray): 770 return np.array([self - o for o in y]) 771 elif y.__class__.__name__ in ['Corr', 'CObs']: 772 return NotImplemented 773 else: 774 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 775 776 def __rsub__(self, y): 777 return -1 * (self - y) 778 779 def __pos__(self): 780 return self 781 782 def __neg__(self): 783 return -1 * self 784 785 def __truediv__(self, y): 786 if isinstance(y, Obs): 787 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 788 else: 789 if isinstance(y, np.ndarray): 790 return np.array([self / o for o in y]) 791 elif y.__class__.__name__ in ['Corr', 'CObs']: 792 return NotImplemented 793 else: 794 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 795 796 def __rtruediv__(self, y): 797 if isinstance(y, Obs): 798 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 799 else: 800 if isinstance(y, np.ndarray): 801 return np.array([o / self for o in y]) 802 elif y.__class__.__name__ in ['Corr', 'CObs']: 803 return NotImplemented 804 else: 805 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 806 807 def __pow__(self, y): 808 if isinstance(y, Obs): 809 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 810 else: 811 return derived_observable(lambda x: x[0] ** y, [self]) 812 813 def __rpow__(self, y): 814 if isinstance(y, Obs): 815 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 816 else: 817 return derived_observable(lambda x: y ** x[0], [self]) 818 819 def __abs__(self): 820 return derived_observable(lambda x: anp.abs(x[0]), [self]) 821 822 # Overload numpy functions 823 def sqrt(self): 824 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 825 826 def log(self): 827 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 828 829 def exp(self): 830 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 831 832 def sin(self): 833 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 834 835 def cos(self): 836 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 837 838 def tan(self): 839 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 840 841 def arcsin(self): 842 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 843 844 def arccos(self): 845 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 846 847 def arctan(self): 848 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 849 850 def sinh(self): 851 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 852 853 def cosh(self): 854 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 855 856 def tanh(self): 857 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 858 859 def arcsinh(self): 860 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 861 862 def arccosh(self): 863 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 864 865 def arctanh(self): 866 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 867 868 869class CObs: 870 """Class for a complex valued observable.""" 871 __slots__ = ['_real', '_imag', 'tag'] 872 873 def __init__(self, real, imag=0.0): 874 self._real = real 875 self._imag = imag 876 self.tag = None 877 878 @property 879 def real(self): 880 return self._real 881 882 @property 883 def imag(self): 884 return self._imag 885 886 def gamma_method(self, **kwargs): 887 """Executes the gamma_method for the real and the imaginary part.""" 888 if isinstance(self.real, Obs): 889 self.real.gamma_method(**kwargs) 890 if isinstance(self.imag, Obs): 891 self.imag.gamma_method(**kwargs) 892 893 def is_zero(self): 894 """Checks whether both real and imaginary part are zero within machine precision.""" 895 return self.real == 0.0 and self.imag == 0.0 896 897 def conjugate(self): 898 return CObs(self.real, -self.imag) 899 900 def __add__(self, other): 901 if isinstance(other, np.ndarray): 902 return other + self 903 elif hasattr(other, 'real') and hasattr(other, 'imag'): 904 return CObs(self.real + other.real, 905 self.imag + other.imag) 906 else: 907 return CObs(self.real + other, self.imag) 908 909 def __radd__(self, y): 910 return self + y 911 912 def __sub__(self, other): 913 if isinstance(other, np.ndarray): 914 return -1 * (other - self) 915 elif hasattr(other, 'real') and hasattr(other, 'imag'): 916 return CObs(self.real - other.real, self.imag - other.imag) 917 else: 918 return CObs(self.real - other, self.imag) 919 920 def __rsub__(self, other): 921 return -1 * (self - other) 922 923 def __mul__(self, other): 924 if isinstance(other, np.ndarray): 925 return other * self 926 elif hasattr(other, 'real') and hasattr(other, 'imag'): 927 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 928 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 929 [self.real, other.real, self.imag, other.imag], 930 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 931 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 932 [self.real, other.real, self.imag, other.imag], 933 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 934 elif getattr(other, 'imag', 0) != 0: 935 return CObs(self.real * other.real - self.imag * other.imag, 936 self.imag * other.real + self.real * other.imag) 937 else: 938 return CObs(self.real * other.real, self.imag * other.real) 939 else: 940 return CObs(self.real * other, self.imag * other) 941 942 def __rmul__(self, other): 943 return self * other 944 945 def __truediv__(self, other): 946 if isinstance(other, np.ndarray): 947 return 1 / (other / self) 948 elif hasattr(other, 'real') and hasattr(other, 'imag'): 949 r = other.real ** 2 + other.imag ** 2 950 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 951 else: 952 return CObs(self.real / other, self.imag / other) 953 954 def __rtruediv__(self, other): 955 r = self.real ** 2 + self.imag ** 2 956 if hasattr(other, 'real') and hasattr(other, 'imag'): 957 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 958 else: 959 return CObs(self.real * other / r, -self.imag * other / r) 960 961 def __abs__(self): 962 return np.sqrt(self.real**2 + self.imag**2) 963 964 def __pos__(self): 965 return self 966 967 def __neg__(self): 968 return -1 * self 969 970 def __eq__(self, other): 971 return self.real == other.real and self.imag == other.imag 972 973 def __str__(self): 974 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 975 976 def __repr__(self): 977 return 'CObs[' + str(self) + ']' 978 979 980def _format_uncertainty(value, dvalue): 981 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 982 fexp = np.floor(np.log10(dvalue)) 983 if fexp < 0.0: 984 return '{:{form}}({:2.0f})'.format(value, dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 985 elif fexp == 0.0: 986 return '{:.1f}({:1.1f})'.format(value, dvalue) 987 else: 988 return '{:.0f}({:2.0f})'.format(value, dvalue) 989 990 991def _expand_deltas(deltas, idx, shape): 992 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 993 If idx is of type range, the deltas are not changed 994 995 Parameters 996 ---------- 997 deltas : list 998 List of fluctuations 999 idx : list 1000 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1001 shape : int 1002 Number of configs in idx. 1003 """ 1004 if isinstance(idx, range): 1005 return deltas 1006 else: 1007 ret = np.zeros(idx[-1] - idx[0] + 1) 1008 for i in range(shape): 1009 ret[idx[i] - idx[0]] = deltas[i] 1010 return ret 1011 1012 1013def _merge_idx(idl): 1014 """Returns the union of all lists in idl as sorted list 1015 1016 Parameters 1017 ---------- 1018 idl : list 1019 List of lists or ranges. 1020 """ 1021 1022 # Use groupby to efficiently check whether all elements of idl are identical 1023 try: 1024 g = groupby(idl) 1025 if next(g, True) and not next(g, False): 1026 return idl[0] 1027 except Exception: 1028 pass 1029 1030 if np.all([type(idx) is range for idx in idl]): 1031 if len(set([idx[0] for idx in idl])) == 1: 1032 idstart = min([idx.start for idx in idl]) 1033 idstop = max([idx.stop for idx in idl]) 1034 idstep = min([idx.step for idx in idl]) 1035 return range(idstart, idstop, idstep) 1036 1037 return sorted(set().union(*idl)) 1038 1039 1040def _intersection_idx(idl): 1041 """Returns the intersection of all lists in idl as sorted list 1042 1043 Parameters 1044 ---------- 1045 idl : list 1046 List of lists or ranges. 1047 """ 1048 1049 def _lcm(*args): 1050 """Returns the lowest common multiple of args. 1051 1052 From python 3.9 onwards the math library contains an lcm function.""" 1053 return reduce(lambda a, b: a * b // gcd(a, b), args) 1054 1055 # Use groupby to efficiently check whether all elements of idl are identical 1056 try: 1057 g = groupby(idl) 1058 if next(g, True) and not next(g, False): 1059 return idl[0] 1060 except Exception: 1061 pass 1062 1063 if np.all([type(idx) is range for idx in idl]): 1064 if len(set([idx[0] for idx in idl])) == 1: 1065 idstart = max([idx.start for idx in idl]) 1066 idstop = min([idx.stop for idx in idl]) 1067 idstep = _lcm(*[idx.step for idx in idl]) 1068 return range(idstart, idstop, idstep) 1069 1070 return sorted(set.intersection(*[set(o) for o in idl])) 1071 1072 1073def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1074 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1075 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1076 common divisor of the step sizes is used as new step size. 1077 1078 Parameters 1079 ---------- 1080 deltas : list 1081 List of fluctuations 1082 idx : list 1083 List or range of configs on which the deltas are defined. 1084 Has to be a subset of new_idx and has to be sorted in ascending order. 1085 shape : list 1086 Number of configs in idx. 1087 new_idx : list 1088 List of configs that defines the new range, has to be sorted in ascending order. 1089 """ 1090 1091 if type(idx) is range and type(new_idx) is range: 1092 if idx == new_idx: 1093 return deltas 1094 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1095 for i in range(shape): 1096 ret[idx[i] - new_idx[0]] = deltas[i] 1097 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1098 1099 1100def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): 1101 """Collapse deltas defined on idx to the list of configs that is defined by new_idx. 1102 If idx and new_idx are of type range, the smallest 1103 common divisor of the step sizes is used as new step size. 1104 1105 Parameters 1106 ---------- 1107 deltas : list 1108 List of fluctuations 1109 idx : list 1110 List or range of configs on which the deltas are defined. 1111 Has to be a subset of new_idx and has to be sorted in ascending order. 1112 shape : list 1113 Number of configs in idx. 1114 new_idx : list 1115 List of configs that defines the new range, has to be sorted in ascending order. 1116 """ 1117 1118 if type(idx) is range and type(new_idx) is range: 1119 if idx == new_idx: 1120 return deltas 1121 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1122 for i in range(shape): 1123 if idx[i] in new_idx: 1124 ret[idx[i] - new_idx[0]] = deltas[i] 1125 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1126 1127 1128def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1129 """Filter out all configurations with vanishing fluctuation such that they do not 1130 contribute to the error estimate anymore. Returns the new deltas and 1131 idx according to the filtering. 1132 A fluctuation is considered to be vanishing, if it is smaller than eps times 1133 the mean of the absolute values of all deltas in one list. 1134 1135 Parameters 1136 ---------- 1137 deltas : list 1138 List of fluctuations 1139 idx : list 1140 List or ranges of configs on which the deltas are defined. 1141 eps : float 1142 Prefactor that enters the filter criterion. 1143 """ 1144 new_deltas = [] 1145 new_idx = [] 1146 maxd = np.mean(np.fabs(deltas)) 1147 for i in range(len(deltas)): 1148 if abs(deltas[i]) > eps * maxd: 1149 new_deltas.append(deltas[i]) 1150 new_idx.append(idx[i]) 1151 if new_idx: 1152 return np.array(new_deltas), new_idx 1153 else: 1154 return deltas, idx 1155 1156 1157def derived_observable(func, data, array_mode=False, **kwargs): 1158 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1159 1160 Parameters 1161 ---------- 1162 func : object 1163 arbitrary function of the form func(data, **kwargs). For the 1164 automatic differentiation to work, all numpy functions have to have 1165 the autograd wrapper (use 'import autograd.numpy as anp'). 1166 data : list 1167 list of Obs, e.g. [obs1, obs2, obs3]. 1168 num_grad : bool 1169 if True, numerical derivatives are used instead of autograd 1170 (default False). To control the numerical differentiation the 1171 kwargs of numdifftools.step_generators.MaxStepGenerator 1172 can be used. 1173 man_grad : list 1174 manually supply a list or an array which contains the jacobian 1175 of func. Use cautiously, supplying the wrong derivative will 1176 not be intercepted. 1177 1178 Notes 1179 ----- 1180 For simple mathematical operations it can be practical to use anonymous 1181 functions. For the ratio of two observables one can e.g. use 1182 1183 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1184 """ 1185 1186 data = np.asarray(data) 1187 raveled_data = data.ravel() 1188 1189 # Workaround for matrix operations containing non Obs data 1190 if not all(isinstance(x, Obs) for x in raveled_data): 1191 for i in range(len(raveled_data)): 1192 if isinstance(raveled_data[i], (int, float)): 1193 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1194 1195 allcov = {} 1196 for o in raveled_data: 1197 for name in o.cov_names: 1198 if name in allcov: 1199 if not np.allclose(allcov[name], o.covobs[name].cov): 1200 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1201 else: 1202 allcov[name] = o.covobs[name].cov 1203 1204 n_obs = len(raveled_data) 1205 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1206 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1207 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1208 1209 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1210 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1211 1212 if data.ndim == 1: 1213 values = np.array([o.value for o in data]) 1214 else: 1215 values = np.vectorize(lambda x: x.value)(data) 1216 1217 new_values = func(values, **kwargs) 1218 1219 multi = int(isinstance(new_values, np.ndarray)) 1220 1221 new_r_values = {} 1222 new_idl_d = {} 1223 for name in new_sample_names: 1224 idl = [] 1225 tmp_values = np.zeros(n_obs) 1226 for i, item in enumerate(raveled_data): 1227 tmp_values[i] = item.r_values.get(name, item.value) 1228 tmp_idl = item.idl.get(name) 1229 if tmp_idl is not None: 1230 idl.append(tmp_idl) 1231 if multi > 0: 1232 tmp_values = np.array(tmp_values).reshape(data.shape) 1233 new_r_values[name] = func(tmp_values, **kwargs) 1234 new_idl_d[name] = _merge_idx(idl) 1235 if not is_merged[name]: 1236 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1237 1238 if 'man_grad' in kwargs: 1239 deriv = np.asarray(kwargs.get('man_grad')) 1240 if new_values.shape + data.shape != deriv.shape: 1241 raise Exception('Manual derivative does not have correct shape.') 1242 elif kwargs.get('num_grad') is True: 1243 if multi > 0: 1244 raise Exception('Multi mode currently not supported for numerical derivative') 1245 options = { 1246 'base_step': 0.1, 1247 'step_ratio': 2.5} 1248 for key in options.keys(): 1249 kwarg = kwargs.get(key) 1250 if kwarg is not None: 1251 options[key] = kwarg 1252 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1253 if tmp_df.size == 1: 1254 deriv = np.array([tmp_df.real]) 1255 else: 1256 deriv = tmp_df.real 1257 else: 1258 deriv = jacobian(func)(values, **kwargs) 1259 1260 final_result = np.zeros(new_values.shape, dtype=object) 1261 1262 if array_mode is True: 1263 1264 class _Zero_grad(): 1265 def __init__(self, N): 1266 self.grad = np.zeros((N, 1)) 1267 1268 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1269 d_extracted = {} 1270 g_extracted = {} 1271 for name in new_sample_names: 1272 d_extracted[name] = [] 1273 ens_length = len(new_idl_d[name]) 1274 for i_dat, dat in enumerate(data): 1275 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1276 for name in new_cov_names: 1277 g_extracted[name] = [] 1278 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1279 for i_dat, dat in enumerate(data): 1280 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1281 1282 for i_val, new_val in np.ndenumerate(new_values): 1283 new_deltas = {} 1284 new_grad = {} 1285 if array_mode is True: 1286 for name in new_sample_names: 1287 ens_length = d_extracted[name][0].shape[-1] 1288 new_deltas[name] = np.zeros(ens_length) 1289 for i_dat, dat in enumerate(d_extracted[name]): 1290 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1291 for name in new_cov_names: 1292 new_grad[name] = 0 1293 for i_dat, dat in enumerate(g_extracted[name]): 1294 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1295 else: 1296 for j_obs, obs in np.ndenumerate(data): 1297 for name in obs.names: 1298 if name in obs.cov_names: 1299 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1300 else: 1301 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1302 1303 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1304 1305 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1306 raise Exception('The same name has been used for deltas and covobs!') 1307 new_samples = [] 1308 new_means = [] 1309 new_idl = [] 1310 new_names_obs = [] 1311 for name in new_names: 1312 if name not in new_covobs: 1313 if is_merged[name]: 1314 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1315 else: 1316 filtered_deltas = new_deltas[name] 1317 filtered_idl_d = new_idl_d[name] 1318 1319 new_samples.append(filtered_deltas) 1320 new_idl.append(filtered_idl_d) 1321 new_means.append(new_r_values[name][i_val]) 1322 new_names_obs.append(name) 1323 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1324 for name in new_covobs: 1325 final_result[i_val].names.append(name) 1326 final_result[i_val]._covobs = new_covobs 1327 final_result[i_val]._value = new_val 1328 final_result[i_val].is_merged = is_merged 1329 final_result[i_val].reweighted = reweighted 1330 1331 if multi == 0: 1332 final_result = final_result.item() 1333 1334 return final_result 1335 1336 1337def _reduce_deltas(deltas, idx_old, idx_new): 1338 """Extract deltas defined on idx_old on all configs of idx_new. 1339 1340 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1341 are ordered in an ascending order. 1342 1343 Parameters 1344 ---------- 1345 deltas : list 1346 List of fluctuations 1347 idx_old : list 1348 List or range of configs on which the deltas are defined 1349 idx_new : list 1350 List of configs for which we want to extract the deltas. 1351 Has to be a subset of idx_old. 1352 """ 1353 if not len(deltas) == len(idx_old): 1354 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1355 if type(idx_old) is range and type(idx_new) is range: 1356 if idx_old == idx_new: 1357 return deltas 1358 shape = len(idx_new) 1359 ret = np.zeros(shape) 1360 oldpos = 0 1361 for i in range(shape): 1362 pos = -1 1363 for j in range(oldpos, len(idx_old)): 1364 if idx_old[j] == idx_new[i]: 1365 pos = j 1366 break 1367 if pos < 0: 1368 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1369 ret[i] = deltas[pos] 1370 oldpos = pos 1371 return np.array(ret) 1372 1373 1374def reweight(weight, obs, **kwargs): 1375 """Reweight a list of observables. 1376 1377 Parameters 1378 ---------- 1379 weight : Obs 1380 Reweighting factor. An Observable that has to be defined on a superset of the 1381 configurations in obs[i].idl for all i. 1382 obs : list 1383 list of Obs, e.g. [obs1, obs2, obs3]. 1384 all_configs : bool 1385 if True, the reweighted observables are normalized by the average of 1386 the reweighting factor on all configurations in weight.idl and not 1387 on the configurations in obs[i].idl. Default False. 1388 """ 1389 result = [] 1390 for i in range(len(obs)): 1391 if len(obs[i].cov_names): 1392 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1393 if not set(obs[i].names).issubset(weight.names): 1394 raise Exception('Error: Ensembles do not fit') 1395 for name in obs[i].names: 1396 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1397 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1398 new_samples = [] 1399 w_deltas = {} 1400 for name in sorted(obs[i].names): 1401 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1402 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1403 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1404 1405 if kwargs.get('all_configs'): 1406 new_weight = weight 1407 else: 1408 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1409 1410 result.append(tmp_obs / new_weight) 1411 result[-1].reweighted = True 1412 result[-1].is_merged = obs[i].is_merged 1413 1414 return result 1415 1416 1417def correlate(obs_a, obs_b): 1418 """Correlate two observables. 1419 1420 Parameters 1421 ---------- 1422 obs_a : Obs 1423 First observable 1424 obs_b : Obs 1425 Second observable 1426 1427 Notes 1428 ----- 1429 Keep in mind to only correlate primary observables which have not been reweighted 1430 yet. The reweighting has to be applied after correlating the observables. 1431 Currently only works if ensembles are identical (this is not strictly necessary). 1432 """ 1433 1434 if sorted(obs_a.names) != sorted(obs_b.names): 1435 raise Exception('Ensembles do not fit') 1436 if len(obs_a.cov_names) or len(obs_b.cov_names): 1437 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1438 for name in obs_a.names: 1439 if obs_a.shape[name] != obs_b.shape[name]: 1440 raise Exception('Shapes of ensemble', name, 'do not fit') 1441 if obs_a.idl[name] != obs_b.idl[name]: 1442 raise Exception('idl of ensemble', name, 'do not fit') 1443 1444 if obs_a.reweighted is True: 1445 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1446 if obs_b.reweighted is True: 1447 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1448 1449 new_samples = [] 1450 new_idl = [] 1451 for name in sorted(obs_a.names): 1452 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1453 new_idl.append(obs_a.idl[name]) 1454 1455 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1456 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1457 o.reweighted = obs_a.reweighted or obs_b.reweighted 1458 return o 1459 1460 1461def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1462 r'''Calculates the error covariance matrix of a set of observables. 1463 1464 The gamma method has to be applied first to all observables. 1465 1466 Parameters 1467 ---------- 1468 obs : list or numpy.ndarray 1469 List or one dimensional array of Obs 1470 visualize : bool 1471 If True plots the corresponding normalized correlation matrix (default False). 1472 correlation : bool 1473 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1474 smooth : None or int 1475 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1476 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1477 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1478 small ones. 1479 1480 Notes 1481 ----- 1482 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1483 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1484 in the absence of autocorrelation. 1485 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1486 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1487 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1488 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1489 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1490 ''' 1491 1492 length = len(obs) 1493 1494 max_samples = np.max([o.N for o in obs]) 1495 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1496 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1497 1498 cov = np.zeros((length, length)) 1499 for i in range(length): 1500 for j in range(i, length): 1501 cov[i, j] = _covariance_element(obs[i], obs[j]) 1502 cov = cov + cov.T - np.diag(np.diag(cov)) 1503 1504 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1505 1506 if isinstance(smooth, int): 1507 corr = _smooth_eigenvalues(corr, smooth) 1508 1509 if visualize: 1510 plt.matshow(corr, vmin=-1, vmax=1) 1511 plt.set_cmap('RdBu') 1512 plt.colorbar() 1513 plt.draw() 1514 1515 if correlation is True: 1516 return corr 1517 1518 errors = [o.dvalue for o in obs] 1519 cov = np.diag(errors) @ corr @ np.diag(errors) 1520 1521 eigenvalues = np.linalg.eigh(cov)[0] 1522 if not np.all(eigenvalues >= 0): 1523 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1524 1525 return cov 1526 1527 1528def _smooth_eigenvalues(corr, E): 1529 """Eigenvalue smoothing as described in hep-lat/9412087 1530 1531 corr : np.ndarray 1532 correlation matrix 1533 E : integer 1534 Number of eigenvalues to be left substantially unchanged 1535 """ 1536 if not (2 < E < corr.shape[0] - 1): 1537 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1538 vals, vec = np.linalg.eigh(corr) 1539 lambda_min = np.mean(vals[:-E]) 1540 vals[vals < lambda_min] = lambda_min 1541 vals /= np.mean(vals) 1542 return vec @ np.diag(vals) @ vec.T 1543 1544 1545def _covariance_element(obs1, obs2): 1546 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1547 1548 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1549 deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1550 deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1551 return np.sum(deltas1 * deltas2) 1552 1553 if set(obs1.names).isdisjoint(set(obs2.names)): 1554 return 0.0 1555 1556 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1557 raise Exception('The gamma method has to be applied to both Obs first.') 1558 1559 dvalue = 0.0 1560 1561 for e_name in obs1.mc_names: 1562 1563 if e_name not in obs2.mc_names: 1564 continue 1565 1566 idl_d = {} 1567 for r_name in obs1.e_content[e_name]: 1568 if r_name not in obs2.e_content[e_name]: 1569 continue 1570 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1571 1572 gamma = 0.0 1573 1574 for r_name in obs1.e_content[e_name]: 1575 if r_name not in obs2.e_content[e_name]: 1576 continue 1577 if len(idl_d[r_name]) == 0: 1578 continue 1579 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1580 1581 if gamma == 0.0: 1582 continue 1583 1584 gamma_div = 0.0 1585 for r_name in obs1.e_content[e_name]: 1586 if r_name not in obs2.e_content[e_name]: 1587 continue 1588 if len(idl_d[r_name]) == 0: 1589 continue 1590 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1591 gamma /= gamma_div 1592 1593 dvalue += gamma 1594 1595 for e_name in obs1.cov_names: 1596 1597 if e_name not in obs2.cov_names: 1598 continue 1599 1600 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1601 1602 return dvalue 1603 1604 1605def import_jackknife(jacks, name, idl=None): 1606 """Imports jackknife samples and returns an Obs 1607 1608 Parameters 1609 ---------- 1610 jacks : numpy.ndarray 1611 numpy array containing the mean value as zeroth entry and 1612 the N jackknife samples as first to Nth entry. 1613 name : str 1614 name of the ensemble the samples are defined on. 1615 """ 1616 length = len(jacks) - 1 1617 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1618 samples = jacks[1:] @ prj 1619 mean = np.mean(samples) 1620 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1621 new_obs._value = jacks[0] 1622 return new_obs 1623 1624 1625def merge_obs(list_of_obs): 1626 """Combine all observables in list_of_obs into one new observable 1627 1628 Parameters 1629 ---------- 1630 list_of_obs : list 1631 list of the Obs object to be combined 1632 1633 Notes 1634 ----- 1635 It is not possible to combine obs which are based on the same replicum 1636 """ 1637 replist = [item for obs in list_of_obs for item in obs.names] 1638 if (len(replist) == len(set(replist))) is False: 1639 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1640 if any([len(o.cov_names) for o in list_of_obs]): 1641 raise Exception('Not possible to merge data that contains covobs!') 1642 new_dict = {} 1643 idl_dict = {} 1644 for o in list_of_obs: 1645 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1646 for key in set(o.deltas) | set(o.r_values)}) 1647 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1648 1649 names = sorted(new_dict.keys()) 1650 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1651 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1652 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1653 return o 1654 1655 1656def cov_Obs(means, cov, name, grad=None): 1657 """Create an Obs based on mean(s) and a covariance matrix 1658 1659 Parameters 1660 ---------- 1661 mean : list of floats or float 1662 N mean value(s) of the new Obs 1663 cov : list or array 1664 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1665 name : str 1666 identifier for the covariance matrix 1667 grad : list or array 1668 Gradient of the Covobs wrt. the means belonging to cov. 1669 """ 1670 1671 def covobs_to_obs(co): 1672 """Make an Obs out of a Covobs 1673 1674 Parameters 1675 ---------- 1676 co : Covobs 1677 Covobs to be embedded into the Obs 1678 """ 1679 o = Obs([], [], means=[]) 1680 o._value = co.value 1681 o.names.append(co.name) 1682 o._covobs[co.name] = co 1683 o._dvalue = np.sqrt(co.errsq()) 1684 return o 1685 1686 ol = [] 1687 if isinstance(means, (float, int)): 1688 means = [means] 1689 1690 for i in range(len(means)): 1691 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1692 if ol[0].covobs[name].N != len(means): 1693 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1694 if len(ol) == 1: 1695 return ol[0] 1696 return ol
17class Obs: 18 """Class for a general observable. 19 20 Instances of Obs are the basic objects of a pyerrors error analysis. 21 They are initialized with a list which contains arrays of samples for 22 different ensembles/replica and another list of same length which contains 23 the names of the ensembles/replica. Mathematical operations can be 24 performed on instances. The result is another instance of Obs. The error of 25 an instance can be computed with the gamma_method. Also contains additional 26 methods for output and visualization of the error calculation. 27 28 Attributes 29 ---------- 30 S_global : float 31 Standard value for S (default 2.0) 32 S_dict : dict 33 Dictionary for S values. If an entry for a given ensemble 34 exists this overwrites the standard value for that ensemble. 35 tau_exp_global : float 36 Standard value for tau_exp (default 0.0) 37 tau_exp_dict : dict 38 Dictionary for tau_exp values. If an entry for a given ensemble exists 39 this overwrites the standard value for that ensemble. 40 N_sigma_global : float 41 Standard value for N_sigma (default 1.0) 42 N_sigma_dict : dict 43 Dictionary for N_sigma values. If an entry for a given ensemble exists 44 this overwrites the standard value for that ensemble. 45 """ 46 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 47 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 48 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 49 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 50 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 51 52 S_global = 2.0 53 S_dict = {} 54 tau_exp_global = 0.0 55 tau_exp_dict = {} 56 N_sigma_global = 1.0 57 N_sigma_dict = {} 58 filter_eps = 1e-10 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise Exception('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise Exception('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise Exception('names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise Exception('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.is_merged = {} 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise Exception("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise Exception('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None 141 142 @property 143 def value(self): 144 return self._value 145 146 @property 147 def dvalue(self): 148 return self._dvalue 149 150 @property 151 def e_names(self): 152 return sorted(set([o.split('|')[0] for o in self.names])) 153 154 @property 155 def cov_names(self): 156 return sorted(set([o for o in self.covobs.keys()])) 157 158 @property 159 def mc_names(self): 160 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 161 162 @property 163 def e_content(self): 164 res = {} 165 for e, e_name in enumerate(self.e_names): 166 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 167 if e_name in self.names: 168 res[e_name].append(e_name) 169 return res 170 171 @property 172 def covobs(self): 173 return self._covobs 174 175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 gaps = [] 272 for r_name in e_content[e_name]: 273 if isinstance(self.idl[r_name], range): 274 gaps.append(1) 275 else: 276 gaps.append(np.min(np.diff(self.idl[r_name]))) 277 278 if not np.all([gi == gaps[0] for gi in gaps]): 279 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 280 else: 281 gapsize = gaps[0] 282 283 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 284 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 285 # Make sure no entry of tauint is smaller than 0.5 286 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 287 # hep-lat/0306017 eq. (42) 288 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 289 self.e_n_dtauint[e_name][0] = 0.0 290 291 def _compute_drho(i): 292 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 293 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 294 295 _compute_drho(gapsize) 296 if self.tau_exp[e_name] > 0: 297 texp = self.tau_exp[e_name] 298 # Critical slowing down analysis 299 if w_max // 2 <= 1: 300 raise Exception("Need at least 8 samples for tau_exp error analysis") 301 for n in range(gapsize, w_max // 2, gapsize): 302 _compute_drho(n + gapsize) 303 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 304 # Bias correction hep-lat/0306017 eq. (49) included 305 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 306 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 307 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 308 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 310 self.e_windowsize[e_name] = n 311 break 312 else: 313 if self.S[e_name] == 0.0: 314 self.e_tauint[e_name] = 0.5 315 self.e_dtauint[e_name] = 0.0 316 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 318 self.e_windowsize[e_name] = 0 319 else: 320 # Standard automatic windowing procedure 321 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 322 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 323 for n in range(1, w_max): 324 if n < w_max // 2 - 2: 325 _compute_drho(gapsize * n + gapsize) 326 if g_w[n - 1] < 0 or n >= w_max - 1: 327 n *= gapsize 328 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 329 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 330 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 331 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 332 self.e_windowsize[e_name] = n 333 break 334 335 self._dvalue += self.e_dvalue[e_name] ** 2 336 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 337 338 for e_name in self.cov_names: 339 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 340 self.e_ddvalue[e_name] = 0 341 self._dvalue += self.e_dvalue[e_name]**2 342 343 self._dvalue = np.sqrt(self._dvalue) 344 if self._dvalue == 0.0: 345 self.ddvalue = 0.0 346 else: 347 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 348 return 349 350 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 351 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 352 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 353 354 Parameters 355 ---------- 356 deltas : list 357 List of fluctuations 358 idx : list 359 List or range of configurations on which the deltas are defined. 360 shape : int 361 Number of configurations in idx. 362 w_max : int 363 Upper bound for the summation window. 364 fft : bool 365 determines whether the fft algorithm is used for the computation 366 of the autocorrelation function. 367 """ 368 gamma = np.zeros(w_max) 369 deltas = _expand_deltas(deltas, idx, shape) 370 new_shape = len(deltas) 371 if fft: 372 max_gamma = min(new_shape, w_max) 373 # The padding for the fft has to be even 374 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 375 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 376 else: 377 for n in range(w_max): 378 if new_shape - n >= 0: 379 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 380 381 return gamma 382 383 def details(self, ens_content=True): 384 """Output detailed properties of the Obs. 385 386 Parameters 387 ---------- 388 ens_content : bool 389 print details about the ensembles and replica if true. 390 """ 391 if self.tag is not None: 392 print("Description:", self.tag) 393 if not hasattr(self, 'e_dvalue'): 394 print('Result\t %3.8e' % (self.value)) 395 else: 396 if self.value == 0.0: 397 percentage = np.nan 398 else: 399 percentage = np.abs(self._dvalue / self.value) * 100 400 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 401 if len(self.e_names) > 1: 402 print(' Ensemble errors:') 403 e_content = self.e_content 404 for e_name in self.mc_names: 405 if isinstance(self.idl[e_content[e_name][0]], range): 406 gap = self.idl[e_content[e_name][0]].step 407 else: 408 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 409 410 if len(self.e_names) > 1: 411 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 412 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 413 tau_string += f" in units of {gap} config" 414 if gap > 1: 415 tau_string += "s" 416 if self.tau_exp[e_name] > 0: 417 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 418 else: 419 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 420 print(tau_string) 421 for e_name in self.cov_names: 422 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 423 if ens_content is True: 424 if len(self.e_names) == 1: 425 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 426 else: 427 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 428 my_string_list = [] 429 for key, value in sorted(self.e_content.items()): 430 if key not in self.covobs: 431 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 432 if len(value) == 1: 433 my_string += f': {self.shape[value[0]]} configurations' 434 if isinstance(self.idl[value[0]], range): 435 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 436 else: 437 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 438 else: 439 sublist = [] 440 for v in value: 441 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 442 my_substring += f': {self.shape[v]} configurations' 443 if isinstance(self.idl[v], range): 444 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 445 else: 446 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 447 sublist.append(my_substring) 448 449 my_string += '\n' + '\n'.join(sublist) 450 else: 451 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 452 my_string_list.append(my_string) 453 print('\n'.join(my_string_list)) 454 455 def reweight(self, weight): 456 """Reweight the obs with given rewighting factors. 457 458 Parameters 459 ---------- 460 weight : Obs 461 Reweighting factor. An Observable that has to be defined on a superset of the 462 configurations in obs[i].idl for all i. 463 all_configs : bool 464 if True, the reweighted observables are normalized by the average of 465 the reweighting factor on all configurations in weight.idl and not 466 on the configurations in obs[i].idl. Default False. 467 """ 468 return reweight(weight, [self])[0] 469 470 def is_zero_within_error(self, sigma=1): 471 """Checks whether the observable is zero within 'sigma' standard errors. 472 473 Parameters 474 ---------- 475 sigma : int 476 Number of standard errors used for the check. 477 478 Works only properly when the gamma method was run. 479 """ 480 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 481 482 def is_zero(self, atol=1e-10): 483 """Checks whether the observable is zero within a given tolerance. 484 485 Parameters 486 ---------- 487 atol : float 488 Absolute tolerance (for details see numpy documentation). 489 """ 490 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 491 492 def plot_tauint(self, save=None): 493 """Plot integrated autocorrelation time for each ensemble. 494 495 Parameters 496 ---------- 497 save : str 498 saves the figure to a file named 'save' if. 499 """ 500 if not hasattr(self, 'e_dvalue'): 501 raise Exception('Run the gamma method first.') 502 503 for e, e_name in enumerate(self.mc_names): 504 fig = plt.figure() 505 plt.xlabel(r'$W$') 506 plt.ylabel(r'$\tau_\mathrm{int}$') 507 length = int(len(self.e_n_tauint[e_name])) 508 if self.tau_exp[e_name] > 0: 509 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 510 x_help = np.arange(2 * self.tau_exp[e_name]) 511 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 512 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 513 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 514 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 515 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 516 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 517 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 518 else: 519 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 520 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 521 522 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 523 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 524 plt.legend() 525 plt.xlim(-0.5, xmax) 526 ylim = plt.ylim() 527 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 528 plt.draw() 529 if save: 530 fig.savefig(save + "_" + str(e)) 531 532 def plot_rho(self, save=None): 533 """Plot normalized autocorrelation function time for each ensemble. 534 535 Parameters 536 ---------- 537 save : str 538 saves the figure to a file named 'save' if. 539 """ 540 if not hasattr(self, 'e_dvalue'): 541 raise Exception('Run the gamma method first.') 542 for e, e_name in enumerate(self.mc_names): 543 fig = plt.figure() 544 plt.xlabel('W') 545 plt.ylabel('rho') 546 length = int(len(self.e_drho[e_name])) 547 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 548 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 549 if self.tau_exp[e_name] > 0: 550 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 551 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 552 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 553 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 554 else: 555 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 556 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 557 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 558 plt.xlim(-0.5, xmax) 559 plt.draw() 560 if save: 561 fig.savefig(save + "_" + str(e)) 562 563 def plot_rep_dist(self): 564 """Plot replica distribution for each ensemble with more than one replicum.""" 565 if not hasattr(self, 'e_dvalue'): 566 raise Exception('Run the gamma method first.') 567 for e, e_name in enumerate(self.mc_names): 568 if len(self.e_content[e_name]) == 1: 569 print('No replica distribution for a single replicum (', e_name, ')') 570 continue 571 r_length = [] 572 sub_r_mean = 0 573 for r, r_name in enumerate(self.e_content[e_name]): 574 r_length.append(len(self.deltas[r_name])) 575 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 576 e_N = np.sum(r_length) 577 sub_r_mean /= e_N 578 arr = np.zeros(len(self.e_content[e_name])) 579 for r, r_name in enumerate(self.e_content[e_name]): 580 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 581 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 582 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 583 plt.draw() 584 585 def plot_history(self, expand=True): 586 """Plot derived Monte Carlo history for each ensemble 587 588 Parameters 589 ---------- 590 expand : bool 591 show expanded history for irregular Monte Carlo chains (default: True). 592 """ 593 for e, e_name in enumerate(self.mc_names): 594 plt.figure() 595 r_length = [] 596 tmp = [] 597 tmp_expanded = [] 598 for r, r_name in enumerate(self.e_content[e_name]): 599 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 600 if expand: 601 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 602 r_length.append(len(tmp_expanded[-1])) 603 else: 604 r_length.append(len(tmp[-1])) 605 e_N = np.sum(r_length) 606 x = np.arange(e_N) 607 y_test = np.concatenate(tmp, axis=0) 608 if expand: 609 y = np.concatenate(tmp_expanded, axis=0) 610 else: 611 y = y_test 612 plt.errorbar(x, y, fmt='.', markersize=3) 613 plt.xlim(-0.5, e_N - 0.5) 614 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 615 plt.draw() 616 617 def plot_piechart(self, save=None): 618 """Plot piechart which shows the fractional contribution of each 619 ensemble to the error and returns a dictionary containing the fractions. 620 621 Parameters 622 ---------- 623 save : str 624 saves the figure to a file named 'save' if. 625 """ 626 if not hasattr(self, 'e_dvalue'): 627 raise Exception('Run the gamma method first.') 628 if np.isclose(0.0, self._dvalue, atol=1e-15): 629 raise Exception('Error is 0.0') 630 labels = self.e_names 631 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 632 fig1, ax1 = plt.subplots() 633 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 634 ax1.axis('equal') 635 plt.draw() 636 if save: 637 fig1.savefig(save) 638 639 return dict(zip(self.e_names, sizes)) 640 641 def dump(self, filename, datatype="json.gz", description="", **kwargs): 642 """Dump the Obs to a file 'name' of chosen format. 643 644 Parameters 645 ---------- 646 filename : str 647 name of the file to be saved. 648 datatype : str 649 Format of the exported file. Supported formats include 650 "json.gz" and "pickle" 651 description : str 652 Description for output file, only relevant for json.gz format. 653 path : str 654 specifies a custom path for the file (default '.') 655 """ 656 if 'path' in kwargs: 657 file_name = kwargs.get('path') + '/' + filename 658 else: 659 file_name = filename 660 661 if datatype == "json.gz": 662 from .input.json import dump_to_json 663 dump_to_json([self], file_name, description=description) 664 elif datatype == "pickle": 665 with open(file_name + '.p', 'wb') as fb: 666 pickle.dump(self, fb) 667 else: 668 raise Exception("Unknown datatype " + str(datatype)) 669 670 def export_jackknife(self): 671 """Export jackknife samples from the Obs 672 673 Returns 674 ------- 675 numpy.ndarray 676 Returns a numpy array of length N + 1 where N is the number of samples 677 for the given ensemble and replicum. The zeroth entry of the array contains 678 the mean value of the Obs, entries 1 to N contain the N jackknife samples 679 derived from the Obs. The current implementation only works for observables 680 defined on exactly one ensemble and replicum. The derived jackknife samples 681 should agree with samples from a full jackknife analysis up to O(1/N). 682 """ 683 684 if len(self.names) != 1: 685 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 686 687 name = self.names[0] 688 full_data = self.deltas[name] + self.r_values[name] 689 n = full_data.size 690 mean = self.value 691 tmp_jacks = np.zeros(n + 1) 692 tmp_jacks[0] = mean 693 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 694 return tmp_jacks 695 696 def __float__(self): 697 return float(self.value) 698 699 def __repr__(self): 700 return 'Obs[' + str(self) + ']' 701 702 def __str__(self): 703 if self._dvalue == 0.0: 704 return str(self.value) 705 return _format_uncertainty(self.value, self._dvalue) 706 707 def __hash__(self): 708 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 709 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 710 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 711 hash_tuple += tuple([o.encode() for o in self.names]) 712 m = hashlib.md5() 713 [m.update(o) for o in hash_tuple] 714 return int(m.hexdigest(), 16) & 0xFFFFFFFF 715 716 # Overload comparisons 717 def __lt__(self, other): 718 return self.value < other 719 720 def __le__(self, other): 721 return self.value <= other 722 723 def __gt__(self, other): 724 return self.value > other 725 726 def __ge__(self, other): 727 return self.value >= other 728 729 def __eq__(self, other): 730 return (self - other).is_zero() 731 732 def __ne__(self, other): 733 return not (self - other).is_zero() 734 735 # Overload math operations 736 def __add__(self, y): 737 if isinstance(y, Obs): 738 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 739 else: 740 if isinstance(y, np.ndarray): 741 return np.array([self + o for o in y]) 742 elif y.__class__.__name__ in ['Corr', 'CObs']: 743 return NotImplemented 744 else: 745 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 746 747 def __radd__(self, y): 748 return self + y 749 750 def __mul__(self, y): 751 if isinstance(y, Obs): 752 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 753 else: 754 if isinstance(y, np.ndarray): 755 return np.array([self * o for o in y]) 756 elif isinstance(y, complex): 757 return CObs(self * y.real, self * y.imag) 758 elif y.__class__.__name__ in ['Corr', 'CObs']: 759 return NotImplemented 760 else: 761 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 762 763 def __rmul__(self, y): 764 return self * y 765 766 def __sub__(self, y): 767 if isinstance(y, Obs): 768 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 769 else: 770 if isinstance(y, np.ndarray): 771 return np.array([self - o for o in y]) 772 elif y.__class__.__name__ in ['Corr', 'CObs']: 773 return NotImplemented 774 else: 775 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 776 777 def __rsub__(self, y): 778 return -1 * (self - y) 779 780 def __pos__(self): 781 return self 782 783 def __neg__(self): 784 return -1 * self 785 786 def __truediv__(self, y): 787 if isinstance(y, Obs): 788 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 789 else: 790 if isinstance(y, np.ndarray): 791 return np.array([self / o for o in y]) 792 elif y.__class__.__name__ in ['Corr', 'CObs']: 793 return NotImplemented 794 else: 795 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 796 797 def __rtruediv__(self, y): 798 if isinstance(y, Obs): 799 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 800 else: 801 if isinstance(y, np.ndarray): 802 return np.array([o / self for o in y]) 803 elif y.__class__.__name__ in ['Corr', 'CObs']: 804 return NotImplemented 805 else: 806 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 807 808 def __pow__(self, y): 809 if isinstance(y, Obs): 810 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 811 else: 812 return derived_observable(lambda x: x[0] ** y, [self]) 813 814 def __rpow__(self, y): 815 if isinstance(y, Obs): 816 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 817 else: 818 return derived_observable(lambda x: y ** x[0], [self]) 819 820 def __abs__(self): 821 return derived_observable(lambda x: anp.abs(x[0]), [self]) 822 823 # Overload numpy functions 824 def sqrt(self): 825 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 826 827 def log(self): 828 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 829 830 def exp(self): 831 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 832 833 def sin(self): 834 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 835 836 def cos(self): 837 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 838 839 def tan(self): 840 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 841 842 def arcsin(self): 843 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 844 845 def arccos(self): 846 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 847 848 def arctan(self): 849 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 850 851 def sinh(self): 852 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 853 854 def cosh(self): 855 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 856 857 def tanh(self): 858 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 859 860 def arcsinh(self): 861 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 862 863 def arccosh(self): 864 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 865 866 def arctanh(self): 867 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise Exception('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise Exception('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise Exception('names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise Exception('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.is_merged = {} 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise Exception("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise Exception('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 gaps = [] 272 for r_name in e_content[e_name]: 273 if isinstance(self.idl[r_name], range): 274 gaps.append(1) 275 else: 276 gaps.append(np.min(np.diff(self.idl[r_name]))) 277 278 if not np.all([gi == gaps[0] for gi in gaps]): 279 raise Exception(f"Replica for ensemble {e_name} are not equally spaced.", gaps) 280 else: 281 gapsize = gaps[0] 282 283 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 284 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 285 # Make sure no entry of tauint is smaller than 0.5 286 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 287 # hep-lat/0306017 eq. (42) 288 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) / gapsize + 0.5 - self.e_n_tauint[e_name]) / e_N) 289 self.e_n_dtauint[e_name][0] = 0.0 290 291 def _compute_drho(i): 292 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 293 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 294 295 _compute_drho(gapsize) 296 if self.tau_exp[e_name] > 0: 297 texp = self.tau_exp[e_name] 298 # Critical slowing down analysis 299 if w_max // 2 <= 1: 300 raise Exception("Need at least 8 samples for tau_exp error analysis") 301 for n in range(gapsize, w_max // 2, gapsize): 302 _compute_drho(n + gapsize) 303 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 304 # Bias correction hep-lat/0306017 eq. (49) included 305 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 306 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 307 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 308 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 310 self.e_windowsize[e_name] = n 311 break 312 else: 313 if self.S[e_name] == 0.0: 314 self.e_tauint[e_name] = 0.5 315 self.e_dtauint[e_name] = 0.0 316 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 318 self.e_windowsize[e_name] = 0 319 else: 320 # Standard automatic windowing procedure 321 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][gapsize::gapsize] + 1) / (2 * self.e_n_tauint[e_name][gapsize::gapsize] - 1)) 322 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 323 for n in range(1, w_max): 324 if n < w_max // 2 - 2: 325 _compute_drho(gapsize * n + gapsize) 326 if g_w[n - 1] < 0 or n >= w_max - 1: 327 n *= gapsize 328 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n / gapsize + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 329 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 330 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 331 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n / gapsize + 0.5) / e_N) 332 self.e_windowsize[e_name] = n 333 break 334 335 self._dvalue += self.e_dvalue[e_name] ** 2 336 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 337 338 for e_name in self.cov_names: 339 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 340 self.e_ddvalue[e_name] = 0 341 self._dvalue += self.e_dvalue[e_name]**2 342 343 self._dvalue = np.sqrt(self._dvalue) 344 if self._dvalue == 0.0: 345 self.ddvalue = 0.0 346 else: 347 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 348 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
383 def details(self, ens_content=True): 384 """Output detailed properties of the Obs. 385 386 Parameters 387 ---------- 388 ens_content : bool 389 print details about the ensembles and replica if true. 390 """ 391 if self.tag is not None: 392 print("Description:", self.tag) 393 if not hasattr(self, 'e_dvalue'): 394 print('Result\t %3.8e' % (self.value)) 395 else: 396 if self.value == 0.0: 397 percentage = np.nan 398 else: 399 percentage = np.abs(self._dvalue / self.value) * 100 400 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 401 if len(self.e_names) > 1: 402 print(' Ensemble errors:') 403 e_content = self.e_content 404 for e_name in self.mc_names: 405 if isinstance(self.idl[e_content[e_name][0]], range): 406 gap = self.idl[e_content[e_name][0]].step 407 else: 408 gap = np.min(np.diff(self.idl[e_content[e_name][0]])) 409 410 if len(self.e_names) > 1: 411 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 412 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 413 tau_string += f" in units of {gap} config" 414 if gap > 1: 415 tau_string += "s" 416 if self.tau_exp[e_name] > 0: 417 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 418 else: 419 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 420 print(tau_string) 421 for e_name in self.cov_names: 422 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 423 if ens_content is True: 424 if len(self.e_names) == 1: 425 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 426 else: 427 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 428 my_string_list = [] 429 for key, value in sorted(self.e_content.items()): 430 if key not in self.covobs: 431 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 432 if len(value) == 1: 433 my_string += f': {self.shape[value[0]]} configurations' 434 if isinstance(self.idl[value[0]], range): 435 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 436 else: 437 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 438 else: 439 sublist = [] 440 for v in value: 441 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 442 my_substring += f': {self.shape[v]} configurations' 443 if isinstance(self.idl[v], range): 444 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 445 else: 446 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 447 sublist.append(my_substring) 448 449 my_string += '\n' + '\n'.join(sublist) 450 else: 451 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 452 my_string_list.append(my_string) 453 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
455 def reweight(self, weight): 456 """Reweight the obs with given rewighting factors. 457 458 Parameters 459 ---------- 460 weight : Obs 461 Reweighting factor. An Observable that has to be defined on a superset of the 462 configurations in obs[i].idl for all i. 463 all_configs : bool 464 if True, the reweighted observables are normalized by the average of 465 the reweighting factor on all configurations in weight.idl and not 466 on the configurations in obs[i].idl. Default False. 467 """ 468 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
470 def is_zero_within_error(self, sigma=1): 471 """Checks whether the observable is zero within 'sigma' standard errors. 472 473 Parameters 474 ---------- 475 sigma : int 476 Number of standard errors used for the check. 477 478 Works only properly when the gamma method was run. 479 """ 480 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
482 def is_zero(self, atol=1e-10): 483 """Checks whether the observable is zero within a given tolerance. 484 485 Parameters 486 ---------- 487 atol : float 488 Absolute tolerance (for details see numpy documentation). 489 """ 490 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
492 def plot_tauint(self, save=None): 493 """Plot integrated autocorrelation time for each ensemble. 494 495 Parameters 496 ---------- 497 save : str 498 saves the figure to a file named 'save' if. 499 """ 500 if not hasattr(self, 'e_dvalue'): 501 raise Exception('Run the gamma method first.') 502 503 for e, e_name in enumerate(self.mc_names): 504 fig = plt.figure() 505 plt.xlabel(r'$W$') 506 plt.ylabel(r'$\tau_\mathrm{int}$') 507 length = int(len(self.e_n_tauint[e_name])) 508 if self.tau_exp[e_name] > 0: 509 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 510 x_help = np.arange(2 * self.tau_exp[e_name]) 511 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 512 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 513 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 514 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 515 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 516 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 517 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 518 else: 519 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 520 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 521 522 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 523 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 524 plt.legend() 525 plt.xlim(-0.5, xmax) 526 ylim = plt.ylim() 527 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 528 plt.draw() 529 if save: 530 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
532 def plot_rho(self, save=None): 533 """Plot normalized autocorrelation function time for each ensemble. 534 535 Parameters 536 ---------- 537 save : str 538 saves the figure to a file named 'save' if. 539 """ 540 if not hasattr(self, 'e_dvalue'): 541 raise Exception('Run the gamma method first.') 542 for e, e_name in enumerate(self.mc_names): 543 fig = plt.figure() 544 plt.xlabel('W') 545 plt.ylabel('rho') 546 length = int(len(self.e_drho[e_name])) 547 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 548 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 549 if self.tau_exp[e_name] > 0: 550 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 551 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 552 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 553 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 554 else: 555 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 556 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 557 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 558 plt.xlim(-0.5, xmax) 559 plt.draw() 560 if save: 561 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
563 def plot_rep_dist(self): 564 """Plot replica distribution for each ensemble with more than one replicum.""" 565 if not hasattr(self, 'e_dvalue'): 566 raise Exception('Run the gamma method first.') 567 for e, e_name in enumerate(self.mc_names): 568 if len(self.e_content[e_name]) == 1: 569 print('No replica distribution for a single replicum (', e_name, ')') 570 continue 571 r_length = [] 572 sub_r_mean = 0 573 for r, r_name in enumerate(self.e_content[e_name]): 574 r_length.append(len(self.deltas[r_name])) 575 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 576 e_N = np.sum(r_length) 577 sub_r_mean /= e_N 578 arr = np.zeros(len(self.e_content[e_name])) 579 for r, r_name in enumerate(self.e_content[e_name]): 580 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 581 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 582 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 583 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
585 def plot_history(self, expand=True): 586 """Plot derived Monte Carlo history for each ensemble 587 588 Parameters 589 ---------- 590 expand : bool 591 show expanded history for irregular Monte Carlo chains (default: True). 592 """ 593 for e, e_name in enumerate(self.mc_names): 594 plt.figure() 595 r_length = [] 596 tmp = [] 597 tmp_expanded = [] 598 for r, r_name in enumerate(self.e_content[e_name]): 599 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 600 if expand: 601 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 602 r_length.append(len(tmp_expanded[-1])) 603 else: 604 r_length.append(len(tmp[-1])) 605 e_N = np.sum(r_length) 606 x = np.arange(e_N) 607 y_test = np.concatenate(tmp, axis=0) 608 if expand: 609 y = np.concatenate(tmp_expanded, axis=0) 610 else: 611 y = y_test 612 plt.errorbar(x, y, fmt='.', markersize=3) 613 plt.xlim(-0.5, e_N - 0.5) 614 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 615 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
617 def plot_piechart(self, save=None): 618 """Plot piechart which shows the fractional contribution of each 619 ensemble to the error and returns a dictionary containing the fractions. 620 621 Parameters 622 ---------- 623 save : str 624 saves the figure to a file named 'save' if. 625 """ 626 if not hasattr(self, 'e_dvalue'): 627 raise Exception('Run the gamma method first.') 628 if np.isclose(0.0, self._dvalue, atol=1e-15): 629 raise Exception('Error is 0.0') 630 labels = self.e_names 631 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 632 fig1, ax1 = plt.subplots() 633 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 634 ax1.axis('equal') 635 plt.draw() 636 if save: 637 fig1.savefig(save) 638 639 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
641 def dump(self, filename, datatype="json.gz", description="", **kwargs): 642 """Dump the Obs to a file 'name' of chosen format. 643 644 Parameters 645 ---------- 646 filename : str 647 name of the file to be saved. 648 datatype : str 649 Format of the exported file. Supported formats include 650 "json.gz" and "pickle" 651 description : str 652 Description for output file, only relevant for json.gz format. 653 path : str 654 specifies a custom path for the file (default '.') 655 """ 656 if 'path' in kwargs: 657 file_name = kwargs.get('path') + '/' + filename 658 else: 659 file_name = filename 660 661 if datatype == "json.gz": 662 from .input.json import dump_to_json 663 dump_to_json([self], file_name, description=description) 664 elif datatype == "pickle": 665 with open(file_name + '.p', 'wb') as fb: 666 pickle.dump(self, fb) 667 else: 668 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
670 def export_jackknife(self): 671 """Export jackknife samples from the Obs 672 673 Returns 674 ------- 675 numpy.ndarray 676 Returns a numpy array of length N + 1 where N is the number of samples 677 for the given ensemble and replicum. The zeroth entry of the array contains 678 the mean value of the Obs, entries 1 to N contain the N jackknife samples 679 derived from the Obs. The current implementation only works for observables 680 defined on exactly one ensemble and replicum. The derived jackknife samples 681 should agree with samples from a full jackknife analysis up to O(1/N). 682 """ 683 684 if len(self.names) != 1: 685 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 686 687 name = self.names[0] 688 full_data = self.deltas[name] + self.r_values[name] 689 n = full_data.size 690 mean = self.value 691 tmp_jacks = np.zeros(n + 1) 692 tmp_jacks[0] = mean 693 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 694 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
870class CObs: 871 """Class for a complex valued observable.""" 872 __slots__ = ['_real', '_imag', 'tag'] 873 874 def __init__(self, real, imag=0.0): 875 self._real = real 876 self._imag = imag 877 self.tag = None 878 879 @property 880 def real(self): 881 return self._real 882 883 @property 884 def imag(self): 885 return self._imag 886 887 def gamma_method(self, **kwargs): 888 """Executes the gamma_method for the real and the imaginary part.""" 889 if isinstance(self.real, Obs): 890 self.real.gamma_method(**kwargs) 891 if isinstance(self.imag, Obs): 892 self.imag.gamma_method(**kwargs) 893 894 def is_zero(self): 895 """Checks whether both real and imaginary part are zero within machine precision.""" 896 return self.real == 0.0 and self.imag == 0.0 897 898 def conjugate(self): 899 return CObs(self.real, -self.imag) 900 901 def __add__(self, other): 902 if isinstance(other, np.ndarray): 903 return other + self 904 elif hasattr(other, 'real') and hasattr(other, 'imag'): 905 return CObs(self.real + other.real, 906 self.imag + other.imag) 907 else: 908 return CObs(self.real + other, self.imag) 909 910 def __radd__(self, y): 911 return self + y 912 913 def __sub__(self, other): 914 if isinstance(other, np.ndarray): 915 return -1 * (other - self) 916 elif hasattr(other, 'real') and hasattr(other, 'imag'): 917 return CObs(self.real - other.real, self.imag - other.imag) 918 else: 919 return CObs(self.real - other, self.imag) 920 921 def __rsub__(self, other): 922 return -1 * (self - other) 923 924 def __mul__(self, other): 925 if isinstance(other, np.ndarray): 926 return other * self 927 elif hasattr(other, 'real') and hasattr(other, 'imag'): 928 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 929 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 930 [self.real, other.real, self.imag, other.imag], 931 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 932 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 933 [self.real, other.real, self.imag, other.imag], 934 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 935 elif getattr(other, 'imag', 0) != 0: 936 return CObs(self.real * other.real - self.imag * other.imag, 937 self.imag * other.real + self.real * other.imag) 938 else: 939 return CObs(self.real * other.real, self.imag * other.real) 940 else: 941 return CObs(self.real * other, self.imag * other) 942 943 def __rmul__(self, other): 944 return self * other 945 946 def __truediv__(self, other): 947 if isinstance(other, np.ndarray): 948 return 1 / (other / self) 949 elif hasattr(other, 'real') and hasattr(other, 'imag'): 950 r = other.real ** 2 + other.imag ** 2 951 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 952 else: 953 return CObs(self.real / other, self.imag / other) 954 955 def __rtruediv__(self, other): 956 r = self.real ** 2 + self.imag ** 2 957 if hasattr(other, 'real') and hasattr(other, 'imag'): 958 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 959 else: 960 return CObs(self.real * other / r, -self.imag * other / r) 961 962 def __abs__(self): 963 return np.sqrt(self.real**2 + self.imag**2) 964 965 def __pos__(self): 966 return self 967 968 def __neg__(self): 969 return -1 * self 970 971 def __eq__(self, other): 972 return self.real == other.real and self.imag == other.imag 973 974 def __str__(self): 975 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 976 977 def __repr__(self): 978 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
887 def gamma_method(self, **kwargs): 888 """Executes the gamma_method for the real and the imaginary part.""" 889 if isinstance(self.real, Obs): 890 self.real.gamma_method(**kwargs) 891 if isinstance(self.imag, Obs): 892 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1158def derived_observable(func, data, array_mode=False, **kwargs): 1159 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1160 1161 Parameters 1162 ---------- 1163 func : object 1164 arbitrary function of the form func(data, **kwargs). For the 1165 automatic differentiation to work, all numpy functions have to have 1166 the autograd wrapper (use 'import autograd.numpy as anp'). 1167 data : list 1168 list of Obs, e.g. [obs1, obs2, obs3]. 1169 num_grad : bool 1170 if True, numerical derivatives are used instead of autograd 1171 (default False). To control the numerical differentiation the 1172 kwargs of numdifftools.step_generators.MaxStepGenerator 1173 can be used. 1174 man_grad : list 1175 manually supply a list or an array which contains the jacobian 1176 of func. Use cautiously, supplying the wrong derivative will 1177 not be intercepted. 1178 1179 Notes 1180 ----- 1181 For simple mathematical operations it can be practical to use anonymous 1182 functions. For the ratio of two observables one can e.g. use 1183 1184 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1185 """ 1186 1187 data = np.asarray(data) 1188 raveled_data = data.ravel() 1189 1190 # Workaround for matrix operations containing non Obs data 1191 if not all(isinstance(x, Obs) for x in raveled_data): 1192 for i in range(len(raveled_data)): 1193 if isinstance(raveled_data[i], (int, float)): 1194 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1195 1196 allcov = {} 1197 for o in raveled_data: 1198 for name in o.cov_names: 1199 if name in allcov: 1200 if not np.allclose(allcov[name], o.covobs[name].cov): 1201 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1202 else: 1203 allcov[name] = o.covobs[name].cov 1204 1205 n_obs = len(raveled_data) 1206 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1207 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1208 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1209 1210 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1211 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1212 1213 if data.ndim == 1: 1214 values = np.array([o.value for o in data]) 1215 else: 1216 values = np.vectorize(lambda x: x.value)(data) 1217 1218 new_values = func(values, **kwargs) 1219 1220 multi = int(isinstance(new_values, np.ndarray)) 1221 1222 new_r_values = {} 1223 new_idl_d = {} 1224 for name in new_sample_names: 1225 idl = [] 1226 tmp_values = np.zeros(n_obs) 1227 for i, item in enumerate(raveled_data): 1228 tmp_values[i] = item.r_values.get(name, item.value) 1229 tmp_idl = item.idl.get(name) 1230 if tmp_idl is not None: 1231 idl.append(tmp_idl) 1232 if multi > 0: 1233 tmp_values = np.array(tmp_values).reshape(data.shape) 1234 new_r_values[name] = func(tmp_values, **kwargs) 1235 new_idl_d[name] = _merge_idx(idl) 1236 if not is_merged[name]: 1237 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1238 1239 if 'man_grad' in kwargs: 1240 deriv = np.asarray(kwargs.get('man_grad')) 1241 if new_values.shape + data.shape != deriv.shape: 1242 raise Exception('Manual derivative does not have correct shape.') 1243 elif kwargs.get('num_grad') is True: 1244 if multi > 0: 1245 raise Exception('Multi mode currently not supported for numerical derivative') 1246 options = { 1247 'base_step': 0.1, 1248 'step_ratio': 2.5} 1249 for key in options.keys(): 1250 kwarg = kwargs.get(key) 1251 if kwarg is not None: 1252 options[key] = kwarg 1253 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1254 if tmp_df.size == 1: 1255 deriv = np.array([tmp_df.real]) 1256 else: 1257 deriv = tmp_df.real 1258 else: 1259 deriv = jacobian(func)(values, **kwargs) 1260 1261 final_result = np.zeros(new_values.shape, dtype=object) 1262 1263 if array_mode is True: 1264 1265 class _Zero_grad(): 1266 def __init__(self, N): 1267 self.grad = np.zeros((N, 1)) 1268 1269 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1270 d_extracted = {} 1271 g_extracted = {} 1272 for name in new_sample_names: 1273 d_extracted[name] = [] 1274 ens_length = len(new_idl_d[name]) 1275 for i_dat, dat in enumerate(data): 1276 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1277 for name in new_cov_names: 1278 g_extracted[name] = [] 1279 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1280 for i_dat, dat in enumerate(data): 1281 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1282 1283 for i_val, new_val in np.ndenumerate(new_values): 1284 new_deltas = {} 1285 new_grad = {} 1286 if array_mode is True: 1287 for name in new_sample_names: 1288 ens_length = d_extracted[name][0].shape[-1] 1289 new_deltas[name] = np.zeros(ens_length) 1290 for i_dat, dat in enumerate(d_extracted[name]): 1291 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1292 for name in new_cov_names: 1293 new_grad[name] = 0 1294 for i_dat, dat in enumerate(g_extracted[name]): 1295 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1296 else: 1297 for j_obs, obs in np.ndenumerate(data): 1298 for name in obs.names: 1299 if name in obs.cov_names: 1300 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1301 else: 1302 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1303 1304 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1305 1306 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1307 raise Exception('The same name has been used for deltas and covobs!') 1308 new_samples = [] 1309 new_means = [] 1310 new_idl = [] 1311 new_names_obs = [] 1312 for name in new_names: 1313 if name not in new_covobs: 1314 if is_merged[name]: 1315 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1316 else: 1317 filtered_deltas = new_deltas[name] 1318 filtered_idl_d = new_idl_d[name] 1319 1320 new_samples.append(filtered_deltas) 1321 new_idl.append(filtered_idl_d) 1322 new_means.append(new_r_values[name][i_val]) 1323 new_names_obs.append(name) 1324 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1325 for name in new_covobs: 1326 final_result[i_val].names.append(name) 1327 final_result[i_val]._covobs = new_covobs 1328 final_result[i_val]._value = new_val 1329 final_result[i_val].is_merged = is_merged 1330 final_result[i_val].reweighted = reweighted 1331 1332 if multi == 0: 1333 final_result = final_result.item() 1334 1335 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1375def reweight(weight, obs, **kwargs): 1376 """Reweight a list of observables. 1377 1378 Parameters 1379 ---------- 1380 weight : Obs 1381 Reweighting factor. An Observable that has to be defined on a superset of the 1382 configurations in obs[i].idl for all i. 1383 obs : list 1384 list of Obs, e.g. [obs1, obs2, obs3]. 1385 all_configs : bool 1386 if True, the reweighted observables are normalized by the average of 1387 the reweighting factor on all configurations in weight.idl and not 1388 on the configurations in obs[i].idl. Default False. 1389 """ 1390 result = [] 1391 for i in range(len(obs)): 1392 if len(obs[i].cov_names): 1393 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1394 if not set(obs[i].names).issubset(weight.names): 1395 raise Exception('Error: Ensembles do not fit') 1396 for name in obs[i].names: 1397 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1398 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1399 new_samples = [] 1400 w_deltas = {} 1401 for name in sorted(obs[i].names): 1402 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1403 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1404 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1405 1406 if kwargs.get('all_configs'): 1407 new_weight = weight 1408 else: 1409 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1410 1411 result.append(tmp_obs / new_weight) 1412 result[-1].reweighted = True 1413 result[-1].is_merged = obs[i].is_merged 1414 1415 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1418def correlate(obs_a, obs_b): 1419 """Correlate two observables. 1420 1421 Parameters 1422 ---------- 1423 obs_a : Obs 1424 First observable 1425 obs_b : Obs 1426 Second observable 1427 1428 Notes 1429 ----- 1430 Keep in mind to only correlate primary observables which have not been reweighted 1431 yet. The reweighting has to be applied after correlating the observables. 1432 Currently only works if ensembles are identical (this is not strictly necessary). 1433 """ 1434 1435 if sorted(obs_a.names) != sorted(obs_b.names): 1436 raise Exception('Ensembles do not fit') 1437 if len(obs_a.cov_names) or len(obs_b.cov_names): 1438 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1439 for name in obs_a.names: 1440 if obs_a.shape[name] != obs_b.shape[name]: 1441 raise Exception('Shapes of ensemble', name, 'do not fit') 1442 if obs_a.idl[name] != obs_b.idl[name]: 1443 raise Exception('idl of ensemble', name, 'do not fit') 1444 1445 if obs_a.reweighted is True: 1446 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1447 if obs_b.reweighted is True: 1448 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1449 1450 new_samples = [] 1451 new_idl = [] 1452 for name in sorted(obs_a.names): 1453 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1454 new_idl.append(obs_a.idl[name]) 1455 1456 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1457 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1458 o.reweighted = obs_a.reweighted or obs_b.reweighted 1459 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1462def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1463 r'''Calculates the error covariance matrix of a set of observables. 1464 1465 The gamma method has to be applied first to all observables. 1466 1467 Parameters 1468 ---------- 1469 obs : list or numpy.ndarray 1470 List or one dimensional array of Obs 1471 visualize : bool 1472 If True plots the corresponding normalized correlation matrix (default False). 1473 correlation : bool 1474 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1475 smooth : None or int 1476 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1477 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1478 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1479 small ones. 1480 1481 Notes 1482 ----- 1483 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1484 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1485 in the absence of autocorrelation. 1486 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1487 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1488 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1489 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1490 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1491 ''' 1492 1493 length = len(obs) 1494 1495 max_samples = np.max([o.N for o in obs]) 1496 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1497 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1498 1499 cov = np.zeros((length, length)) 1500 for i in range(length): 1501 for j in range(i, length): 1502 cov[i, j] = _covariance_element(obs[i], obs[j]) 1503 cov = cov + cov.T - np.diag(np.diag(cov)) 1504 1505 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1506 1507 if isinstance(smooth, int): 1508 corr = _smooth_eigenvalues(corr, smooth) 1509 1510 if visualize: 1511 plt.matshow(corr, vmin=-1, vmax=1) 1512 plt.set_cmap('RdBu') 1513 plt.colorbar() 1514 plt.draw() 1515 1516 if correlation is True: 1517 return corr 1518 1519 errors = [o.dvalue for o in obs] 1520 cov = np.diag(errors) @ corr @ np.diag(errors) 1521 1522 eigenvalues = np.linalg.eigh(cov)[0] 1523 if not np.all(eigenvalues >= 0): 1524 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1525 1526 return cov
Calculates the error covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1606def import_jackknife(jacks, name, idl=None): 1607 """Imports jackknife samples and returns an Obs 1608 1609 Parameters 1610 ---------- 1611 jacks : numpy.ndarray 1612 numpy array containing the mean value as zeroth entry and 1613 the N jackknife samples as first to Nth entry. 1614 name : str 1615 name of the ensemble the samples are defined on. 1616 """ 1617 length = len(jacks) - 1 1618 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1619 samples = jacks[1:] @ prj 1620 mean = np.mean(samples) 1621 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1622 new_obs._value = jacks[0] 1623 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1626def merge_obs(list_of_obs): 1627 """Combine all observables in list_of_obs into one new observable 1628 1629 Parameters 1630 ---------- 1631 list_of_obs : list 1632 list of the Obs object to be combined 1633 1634 Notes 1635 ----- 1636 It is not possible to combine obs which are based on the same replicum 1637 """ 1638 replist = [item for obs in list_of_obs for item in obs.names] 1639 if (len(replist) == len(set(replist))) is False: 1640 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1641 if any([len(o.cov_names) for o in list_of_obs]): 1642 raise Exception('Not possible to merge data that contains covobs!') 1643 new_dict = {} 1644 idl_dict = {} 1645 for o in list_of_obs: 1646 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1647 for key in set(o.deltas) | set(o.r_values)}) 1648 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1649 1650 names = sorted(new_dict.keys()) 1651 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1652 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1653 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1654 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1657def cov_Obs(means, cov, name, grad=None): 1658 """Create an Obs based on mean(s) and a covariance matrix 1659 1660 Parameters 1661 ---------- 1662 mean : list of floats or float 1663 N mean value(s) of the new Obs 1664 cov : list or array 1665 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1666 name : str 1667 identifier for the covariance matrix 1668 grad : list or array 1669 Gradient of the Covobs wrt. the means belonging to cov. 1670 """ 1671 1672 def covobs_to_obs(co): 1673 """Make an Obs out of a Covobs 1674 1675 Parameters 1676 ---------- 1677 co : Covobs 1678 Covobs to be embedded into the Obs 1679 """ 1680 o = Obs([], [], means=[]) 1681 o._value = co.value 1682 o.names.append(co.name) 1683 o._covobs[co.name] = co 1684 o._dvalue = np.sqrt(co.errsq()) 1685 return o 1686 1687 ol = [] 1688 if isinstance(means, (float, int)): 1689 means = [means] 1690 1691 for i in range(len(means)): 1692 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1693 if ol[0].covobs[name].N != len(means): 1694 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1695 if len(ol) == 1: 1696 return ol[0] 1697 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.