pyerrors.obs
1import warnings 2import hashlib 3import pickle 4import numpy as np 5import autograd.numpy as anp # Thinly-wrapped numpy 6import scipy 7from autograd import jacobian 8import matplotlib.pyplot as plt 9from scipy.stats import skew, skewtest, kurtosis, kurtosistest 10import numdifftools as nd 11from itertools import groupby 12from .covobs import Covobs 13 14# Improve print output of numpy.ndarrays containing Obs objects. 15np.set_printoptions(formatter={'object': lambda x: str(x)}) 16 17 18class Obs: 19 """Class for a general observable. 20 21 Instances of Obs are the basic objects of a pyerrors error analysis. 22 They are initialized with a list which contains arrays of samples for 23 different ensembles/replica and another list of same length which contains 24 the names of the ensembles/replica. Mathematical operations can be 25 performed on instances. The result is another instance of Obs. The error of 26 an instance can be computed with the gamma_method. Also contains additional 27 methods for output and visualization of the error calculation. 28 29 Attributes 30 ---------- 31 S_global : float 32 Standard value for S (default 2.0) 33 S_dict : dict 34 Dictionary for S values. If an entry for a given ensemble 35 exists this overwrites the standard value for that ensemble. 36 tau_exp_global : float 37 Standard value for tau_exp (default 0.0) 38 tau_exp_dict : dict 39 Dictionary for tau_exp values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 N_sigma_global : float 42 Standard value for N_sigma (default 1.0) 43 N_sigma_dict : dict 44 Dictionary for N_sigma values. If an entry for a given ensemble exists 45 this overwrites the standard value for that ensemble. 46 """ 47 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 48 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 49 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 50 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 51 'idl', 'tag', '_covobs', '__dict__'] 52 53 S_global = 2.0 54 S_dict = {} 55 tau_exp_global = 0.0 56 tau_exp_dict = {} 57 N_sigma_global = 1.0 58 N_sigma_dict = {} 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise ValueError('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise ValueError('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise ValueError('Names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise ValueError('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise ValueError("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise TypeError('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 gapsize = _determine_gap(self, e_content, e_name) 241 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 273 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 274 # Make sure no entry of tauint is smaller than 0.5 275 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 276 # hep-lat/0306017 eq. (42) 277 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 278 self.e_n_dtauint[e_name][0] = 0.0 279 280 def _compute_drho(i): 281 tmp = (self.e_rho[e_name][i + 1:w_max] 282 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 283 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 284 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 285 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 286 287 if self.tau_exp[e_name] > 0: 288 _compute_drho(1) 289 texp = self.tau_exp[e_name] 290 # Critical slowing down analysis 291 if w_max // 2 <= 1: 292 raise Exception("Need at least 8 samples for tau_exp error analysis") 293 for n in range(1, w_max // 2): 294 _compute_drho(n + 1) 295 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 296 # Bias correction hep-lat/0306017 eq. (49) included 297 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 298 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 299 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 300 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 302 self.e_windowsize[e_name] = n 303 break 304 else: 305 if self.S[e_name] == 0.0: 306 self.e_tauint[e_name] = 0.5 307 self.e_dtauint[e_name] = 0.0 308 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 310 self.e_windowsize[e_name] = 0 311 else: 312 # Standard automatic windowing procedure 313 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 314 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 315 for n in range(1, w_max): 316 if g_w[n - 1] < 0 or n >= w_max - 1: 317 _compute_drho(n) 318 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 319 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 320 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 322 self.e_windowsize[e_name] = n 323 break 324 325 self._dvalue += self.e_dvalue[e_name] ** 2 326 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 327 328 for e_name in self.cov_names: 329 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 330 self.e_ddvalue[e_name] = 0 331 self._dvalue += self.e_dvalue[e_name]**2 332 333 self._dvalue = np.sqrt(self._dvalue) 334 if self._dvalue == 0.0: 335 self.ddvalue = 0.0 336 else: 337 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 338 return 339 340 gm = gamma_method 341 342 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 343 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 344 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 345 346 Parameters 347 ---------- 348 deltas : list 349 List of fluctuations 350 idx : list 351 List or range of configurations on which the deltas are defined. 352 shape : int 353 Number of configurations in idx. 354 w_max : int 355 Upper bound for the summation window. 356 fft : bool 357 determines whether the fft algorithm is used for the computation 358 of the autocorrelation function. 359 gapsize : int 360 The target distance between two configurations. If longer distances 361 are found in idx, the data is expanded. 362 """ 363 gamma = np.zeros(w_max) 364 deltas = _expand_deltas(deltas, idx, shape, gapsize) 365 new_shape = len(deltas) 366 if fft: 367 max_gamma = min(new_shape, w_max) 368 # The padding for the fft has to be even 369 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 370 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 371 else: 372 for n in range(w_max): 373 if new_shape - n >= 0: 374 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 375 376 return gamma 377 378 def details(self, ens_content=True): 379 """Output detailed properties of the Obs. 380 381 Parameters 382 ---------- 383 ens_content : bool 384 print details about the ensembles and replica if true. 385 """ 386 if self.tag is not None: 387 print("Description:", self.tag) 388 if not hasattr(self, 'e_dvalue'): 389 print('Result\t %3.8e' % (self.value)) 390 else: 391 if self.value == 0.0: 392 percentage = np.nan 393 else: 394 percentage = np.abs(self._dvalue / self.value) * 100 395 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 396 if len(self.e_names) > 1: 397 print(' Ensemble errors:') 398 e_content = self.e_content 399 for e_name in self.mc_names: 400 gap = _determine_gap(self, e_content, e_name) 401 402 if len(self.e_names) > 1: 403 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 404 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 405 tau_string += f" in units of {gap} config" 406 if gap > 1: 407 tau_string += "s" 408 if self.tau_exp[e_name] > 0: 409 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 410 else: 411 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 412 print(tau_string) 413 for e_name in self.cov_names: 414 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 415 if ens_content is True: 416 if len(self.e_names) == 1: 417 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 418 else: 419 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 420 my_string_list = [] 421 for key, value in sorted(self.e_content.items()): 422 if key not in self.covobs: 423 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 424 if len(value) == 1: 425 my_string += f': {self.shape[value[0]]} configurations' 426 if isinstance(self.idl[value[0]], range): 427 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 428 else: 429 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 430 else: 431 sublist = [] 432 for v in value: 433 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 434 my_substring += f': {self.shape[v]} configurations' 435 if isinstance(self.idl[v], range): 436 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 437 else: 438 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 439 sublist.append(my_substring) 440 441 my_string += '\n' + '\n'.join(sublist) 442 else: 443 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 444 my_string_list.append(my_string) 445 print('\n'.join(my_string_list)) 446 447 def reweight(self, weight): 448 """Reweight the obs with given rewighting factors. 449 450 Parameters 451 ---------- 452 weight : Obs 453 Reweighting factor. An Observable that has to be defined on a superset of the 454 configurations in obs[i].idl for all i. 455 all_configs : bool 456 if True, the reweighted observables are normalized by the average of 457 the reweighting factor on all configurations in weight.idl and not 458 on the configurations in obs[i].idl. Default False. 459 """ 460 return reweight(weight, [self])[0] 461 462 def is_zero_within_error(self, sigma=1): 463 """Checks whether the observable is zero within 'sigma' standard errors. 464 465 Parameters 466 ---------- 467 sigma : int 468 Number of standard errors used for the check. 469 470 Works only properly when the gamma method was run. 471 """ 472 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 473 474 def is_zero(self, atol=1e-10): 475 """Checks whether the observable is zero within a given tolerance. 476 477 Parameters 478 ---------- 479 atol : float 480 Absolute tolerance (for details see numpy documentation). 481 """ 482 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 483 484 def plot_tauint(self, save=None): 485 """Plot integrated autocorrelation time for each ensemble. 486 487 Parameters 488 ---------- 489 save : str 490 saves the figure to a file named 'save' if. 491 """ 492 if not hasattr(self, 'e_dvalue'): 493 raise Exception('Run the gamma method first.') 494 495 for e, e_name in enumerate(self.mc_names): 496 fig = plt.figure() 497 plt.xlabel(r'$W$') 498 plt.ylabel(r'$\tau_\mathrm{int}$') 499 length = int(len(self.e_n_tauint[e_name])) 500 if self.tau_exp[e_name] > 0: 501 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 502 x_help = np.arange(2 * self.tau_exp[e_name]) 503 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 504 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 505 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 506 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 507 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 508 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 509 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 510 else: 511 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 514 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 515 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 516 plt.legend() 517 plt.xlim(-0.5, xmax) 518 ylim = plt.ylim() 519 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 520 plt.draw() 521 if save: 522 fig.savefig(save + "_" + str(e)) 523 524 def plot_rho(self, save=None): 525 """Plot normalized autocorrelation function time for each ensemble. 526 527 Parameters 528 ---------- 529 save : str 530 saves the figure to a file named 'save' if. 531 """ 532 if not hasattr(self, 'e_dvalue'): 533 raise Exception('Run the gamma method first.') 534 for e, e_name in enumerate(self.mc_names): 535 fig = plt.figure() 536 plt.xlabel('W') 537 plt.ylabel('rho') 538 length = int(len(self.e_drho[e_name])) 539 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 540 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 541 if self.tau_exp[e_name] > 0: 542 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 543 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 544 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 545 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 546 else: 547 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 548 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 549 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 550 plt.xlim(-0.5, xmax) 551 plt.draw() 552 if save: 553 fig.savefig(save + "_" + str(e)) 554 555 def plot_rep_dist(self): 556 """Plot replica distribution for each ensemble with more than one replicum.""" 557 if not hasattr(self, 'e_dvalue'): 558 raise Exception('Run the gamma method first.') 559 for e, e_name in enumerate(self.mc_names): 560 if len(self.e_content[e_name]) == 1: 561 print('No replica distribution for a single replicum (', e_name, ')') 562 continue 563 r_length = [] 564 sub_r_mean = 0 565 for r, r_name in enumerate(self.e_content[e_name]): 566 r_length.append(len(self.deltas[r_name])) 567 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 568 e_N = np.sum(r_length) 569 sub_r_mean /= e_N 570 arr = np.zeros(len(self.e_content[e_name])) 571 for r, r_name in enumerate(self.e_content[e_name]): 572 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 573 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 574 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 575 plt.draw() 576 577 def plot_history(self, expand=True): 578 """Plot derived Monte Carlo history for each ensemble 579 580 Parameters 581 ---------- 582 expand : bool 583 show expanded history for irregular Monte Carlo chains (default: True). 584 """ 585 for e, e_name in enumerate(self.mc_names): 586 plt.figure() 587 r_length = [] 588 tmp = [] 589 tmp_expanded = [] 590 for r, r_name in enumerate(self.e_content[e_name]): 591 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 592 if expand: 593 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 594 r_length.append(len(tmp_expanded[-1])) 595 else: 596 r_length.append(len(tmp[-1])) 597 e_N = np.sum(r_length) 598 x = np.arange(e_N) 599 y_test = np.concatenate(tmp, axis=0) 600 if expand: 601 y = np.concatenate(tmp_expanded, axis=0) 602 else: 603 y = y_test 604 plt.errorbar(x, y, fmt='.', markersize=3) 605 plt.xlim(-0.5, e_N - 0.5) 606 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 607 plt.draw() 608 609 def plot_piechart(self, save=None): 610 """Plot piechart which shows the fractional contribution of each 611 ensemble to the error and returns a dictionary containing the fractions. 612 613 Parameters 614 ---------- 615 save : str 616 saves the figure to a file named 'save' if. 617 """ 618 if not hasattr(self, 'e_dvalue'): 619 raise Exception('Run the gamma method first.') 620 if np.isclose(0.0, self._dvalue, atol=1e-15): 621 raise Exception('Error is 0.0') 622 labels = self.e_names 623 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 624 fig1, ax1 = plt.subplots() 625 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 626 ax1.axis('equal') 627 plt.draw() 628 if save: 629 fig1.savefig(save) 630 631 return dict(zip(labels, sizes)) 632 633 def dump(self, filename, datatype="json.gz", description="", **kwargs): 634 """Dump the Obs to a file 'name' of chosen format. 635 636 Parameters 637 ---------- 638 filename : str 639 name of the file to be saved. 640 datatype : str 641 Format of the exported file. Supported formats include 642 "json.gz" and "pickle" 643 description : str 644 Description for output file, only relevant for json.gz format. 645 path : str 646 specifies a custom path for the file (default '.') 647 """ 648 if 'path' in kwargs: 649 file_name = kwargs.get('path') + '/' + filename 650 else: 651 file_name = filename 652 653 if datatype == "json.gz": 654 from .input.json import dump_to_json 655 dump_to_json([self], file_name, description=description) 656 elif datatype == "pickle": 657 with open(file_name + '.p', 'wb') as fb: 658 pickle.dump(self, fb) 659 else: 660 raise Exception("Unknown datatype " + str(datatype)) 661 662 def export_jackknife(self): 663 """Export jackknife samples from the Obs 664 665 Returns 666 ------- 667 numpy.ndarray 668 Returns a numpy array of length N + 1 where N is the number of samples 669 for the given ensemble and replicum. The zeroth entry of the array contains 670 the mean value of the Obs, entries 1 to N contain the N jackknife samples 671 derived from the Obs. The current implementation only works for observables 672 defined on exactly one ensemble and replicum. The derived jackknife samples 673 should agree with samples from a full jackknife analysis up to O(1/N). 674 """ 675 676 if len(self.names) != 1: 677 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 678 679 name = self.names[0] 680 full_data = self.deltas[name] + self.r_values[name] 681 n = full_data.size 682 mean = self.value 683 tmp_jacks = np.zeros(n + 1) 684 tmp_jacks[0] = mean 685 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 686 return tmp_jacks 687 688 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 689 """Export bootstrap samples from the Obs 690 691 Parameters 692 ---------- 693 samples : int 694 Number of bootstrap samples to generate. 695 random_numbers : np.ndarray 696 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 697 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 698 save_rng : str 699 Save the random numbers to a file if a path is specified. 700 701 Returns 702 ------- 703 numpy.ndarray 704 Returns a numpy array of length N + 1 where N is the number of samples 705 for the given ensemble and replicum. The zeroth entry of the array contains 706 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 707 derived from the Obs. The current implementation only works for observables 708 defined on exactly one ensemble and replicum. The derived bootstrap samples 709 should agree with samples from a full bootstrap analysis up to O(1/N). 710 """ 711 if len(self.names) != 1: 712 raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 713 714 name = self.names[0] 715 length = self.N 716 717 if random_numbers is None: 718 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 719 rng = np.random.default_rng(seed) 720 random_numbers = rng.integers(0, length, size=(samples, length)) 721 722 if save_rng is not None: 723 np.savetxt(save_rng, random_numbers, fmt='%i') 724 725 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 726 ret = np.zeros(samples + 1) 727 ret[0] = self.value 728 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 729 return ret 730 731 def __float__(self): 732 return float(self.value) 733 734 def __repr__(self): 735 return 'Obs[' + str(self) + ']' 736 737 def __str__(self): 738 return _format_uncertainty(self.value, self._dvalue) 739 740 def __format__(self, format_type): 741 if format_type == "": 742 significance = 2 743 else: 744 significance = int(float(format_type.replace("+", "").replace("-", ""))) 745 my_str = _format_uncertainty(self.value, self._dvalue, 746 significance=significance) 747 for char in ["+", " "]: 748 if format_type.startswith(char): 749 if my_str[0] != "-": 750 my_str = char + my_str 751 return my_str 752 753 def __hash__(self): 754 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 755 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 756 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 757 hash_tuple += tuple([o.encode() for o in self.names]) 758 m = hashlib.md5() 759 [m.update(o) for o in hash_tuple] 760 return int(m.hexdigest(), 16) & 0xFFFFFFFF 761 762 # Overload comparisons 763 def __lt__(self, other): 764 return self.value < other 765 766 def __le__(self, other): 767 return self.value <= other 768 769 def __gt__(self, other): 770 return self.value > other 771 772 def __ge__(self, other): 773 return self.value >= other 774 775 def __eq__(self, other): 776 return (self - other).is_zero() 777 778 def __ne__(self, other): 779 return not (self - other).is_zero() 780 781 # Overload math operations 782 def __add__(self, y): 783 if isinstance(y, Obs): 784 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 785 else: 786 if isinstance(y, np.ndarray): 787 return np.array([self + o for o in y]) 788 elif y.__class__.__name__ in ['Corr', 'CObs']: 789 return NotImplemented 790 else: 791 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 792 793 def __radd__(self, y): 794 return self + y 795 796 def __mul__(self, y): 797 if isinstance(y, Obs): 798 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 799 else: 800 if isinstance(y, np.ndarray): 801 return np.array([self * o for o in y]) 802 elif isinstance(y, complex): 803 return CObs(self * y.real, self * y.imag) 804 elif y.__class__.__name__ in ['Corr', 'CObs']: 805 return NotImplemented 806 else: 807 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 808 809 def __rmul__(self, y): 810 return self * y 811 812 def __sub__(self, y): 813 if isinstance(y, Obs): 814 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 815 else: 816 if isinstance(y, np.ndarray): 817 return np.array([self - o for o in y]) 818 elif y.__class__.__name__ in ['Corr', 'CObs']: 819 return NotImplemented 820 else: 821 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 822 823 def __rsub__(self, y): 824 return -1 * (self - y) 825 826 def __pos__(self): 827 return self 828 829 def __neg__(self): 830 return -1 * self 831 832 def __truediv__(self, y): 833 if isinstance(y, Obs): 834 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 835 else: 836 if isinstance(y, np.ndarray): 837 return np.array([self / o for o in y]) 838 elif y.__class__.__name__ in ['Corr', 'CObs']: 839 return NotImplemented 840 else: 841 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 842 843 def __rtruediv__(self, y): 844 if isinstance(y, Obs): 845 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 846 else: 847 if isinstance(y, np.ndarray): 848 return np.array([o / self for o in y]) 849 elif y.__class__.__name__ in ['Corr', 'CObs']: 850 return NotImplemented 851 else: 852 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 853 854 def __pow__(self, y): 855 if isinstance(y, Obs): 856 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 857 else: 858 return derived_observable(lambda x: x[0] ** y, [self]) 859 860 def __rpow__(self, y): 861 if isinstance(y, Obs): 862 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 863 else: 864 return derived_observable(lambda x: y ** x[0], [self]) 865 866 def __abs__(self): 867 return derived_observable(lambda x: anp.abs(x[0]), [self]) 868 869 # Overload numpy functions 870 def sqrt(self): 871 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 872 873 def log(self): 874 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 875 876 def exp(self): 877 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 878 879 def sin(self): 880 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 881 882 def cos(self): 883 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 884 885 def tan(self): 886 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 887 888 def arcsin(self): 889 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 890 891 def arccos(self): 892 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 893 894 def arctan(self): 895 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 896 897 def sinh(self): 898 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 899 900 def cosh(self): 901 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 902 903 def tanh(self): 904 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 905 906 def arcsinh(self): 907 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 908 909 def arccosh(self): 910 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 911 912 def arctanh(self): 913 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 914 915 916class CObs: 917 """Class for a complex valued observable.""" 918 __slots__ = ['_real', '_imag', 'tag'] 919 920 def __init__(self, real, imag=0.0): 921 self._real = real 922 self._imag = imag 923 self.tag = None 924 925 @property 926 def real(self): 927 return self._real 928 929 @property 930 def imag(self): 931 return self._imag 932 933 def gamma_method(self, **kwargs): 934 """Executes the gamma_method for the real and the imaginary part.""" 935 if isinstance(self.real, Obs): 936 self.real.gamma_method(**kwargs) 937 if isinstance(self.imag, Obs): 938 self.imag.gamma_method(**kwargs) 939 940 def is_zero(self): 941 """Checks whether both real and imaginary part are zero within machine precision.""" 942 return self.real == 0.0 and self.imag == 0.0 943 944 def conjugate(self): 945 return CObs(self.real, -self.imag) 946 947 def __add__(self, other): 948 if isinstance(other, np.ndarray): 949 return other + self 950 elif hasattr(other, 'real') and hasattr(other, 'imag'): 951 return CObs(self.real + other.real, 952 self.imag + other.imag) 953 else: 954 return CObs(self.real + other, self.imag) 955 956 def __radd__(self, y): 957 return self + y 958 959 def __sub__(self, other): 960 if isinstance(other, np.ndarray): 961 return -1 * (other - self) 962 elif hasattr(other, 'real') and hasattr(other, 'imag'): 963 return CObs(self.real - other.real, self.imag - other.imag) 964 else: 965 return CObs(self.real - other, self.imag) 966 967 def __rsub__(self, other): 968 return -1 * (self - other) 969 970 def __mul__(self, other): 971 if isinstance(other, np.ndarray): 972 return other * self 973 elif hasattr(other, 'real') and hasattr(other, 'imag'): 974 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 975 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 976 [self.real, other.real, self.imag, other.imag], 977 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 978 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 979 [self.real, other.real, self.imag, other.imag], 980 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 981 elif getattr(other, 'imag', 0) != 0: 982 return CObs(self.real * other.real - self.imag * other.imag, 983 self.imag * other.real + self.real * other.imag) 984 else: 985 return CObs(self.real * other.real, self.imag * other.real) 986 else: 987 return CObs(self.real * other, self.imag * other) 988 989 def __rmul__(self, other): 990 return self * other 991 992 def __truediv__(self, other): 993 if isinstance(other, np.ndarray): 994 return 1 / (other / self) 995 elif hasattr(other, 'real') and hasattr(other, 'imag'): 996 r = other.real ** 2 + other.imag ** 2 997 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 998 else: 999 return CObs(self.real / other, self.imag / other) 1000 1001 def __rtruediv__(self, other): 1002 r = self.real ** 2 + self.imag ** 2 1003 if hasattr(other, 'real') and hasattr(other, 'imag'): 1004 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 1005 else: 1006 return CObs(self.real * other / r, -self.imag * other / r) 1007 1008 def __abs__(self): 1009 return np.sqrt(self.real**2 + self.imag**2) 1010 1011 def __pos__(self): 1012 return self 1013 1014 def __neg__(self): 1015 return -1 * self 1016 1017 def __eq__(self, other): 1018 return self.real == other.real and self.imag == other.imag 1019 1020 def __str__(self): 1021 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 1022 1023 def __repr__(self): 1024 return 'CObs[' + str(self) + ']' 1025 1026 def __format__(self, format_type): 1027 if format_type == "": 1028 significance = 2 1029 format_type = "2" 1030 else: 1031 significance = int(float(format_type.replace("+", "").replace("-", ""))) 1032 return f"({self.real:{format_type}}{self.imag:+{significance}}j)" 1033 1034 1035def gamma_method(x, **kwargs): 1036 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1037 1038 See docstring of pe.Obs.gamma_method for details. 1039 """ 1040 return np.vectorize(lambda o: o.gm(**kwargs))(x) 1041 1042 1043def gm(x, **kwargs): 1044 """Short version of the vectorized gamma_method. 1045 1046 See docstring of pe.Obs.gamma_method for details 1047 """ 1048 return gamma_method(x, **kwargs) 1049 1050 1051def _format_uncertainty(value, dvalue, significance=2): 1052 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 1053 if dvalue == 0.0 or (not np.isfinite(dvalue)): 1054 return str(value) 1055 if not isinstance(significance, int): 1056 raise TypeError("significance needs to be an integer.") 1057 if significance < 1: 1058 raise ValueError("significance needs to be larger than zero.") 1059 fexp = np.floor(np.log10(dvalue)) 1060 if fexp < 0.0: 1061 return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f') 1062 elif fexp == 0.0: 1063 return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})" 1064 else: 1065 return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})" 1066 1067 1068def _expand_deltas(deltas, idx, shape, gapsize): 1069 """Expand deltas defined on idx to a regular range with spacing gapsize between two 1070 configurations and where holes are filled by 0. 1071 If idx is of type range, the deltas are not changed if the idx.step == gapsize. 1072 1073 Parameters 1074 ---------- 1075 deltas : list 1076 List of fluctuations 1077 idx : list 1078 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1079 shape : int 1080 Number of configs in idx. 1081 gapsize : int 1082 The target distance between two configurations. If longer distances 1083 are found in idx, the data is expanded. 1084 """ 1085 if isinstance(idx, range): 1086 if (idx.step == gapsize): 1087 return deltas 1088 ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize) 1089 for i in range(shape): 1090 ret[(idx[i] - idx[0]) // gapsize] = deltas[i] 1091 return ret 1092 1093 1094def _merge_idx(idl): 1095 """Returns the union of all lists in idl as range or sorted list 1096 1097 Parameters 1098 ---------- 1099 idl : list 1100 List of lists or ranges. 1101 """ 1102 1103 if _check_lists_equal(idl): 1104 return idl[0] 1105 1106 idunion = sorted(set().union(*idl)) 1107 1108 # Check whether idunion can be expressed as range 1109 idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0]) 1110 idtest = [list(idrange), idunion] 1111 if _check_lists_equal(idtest): 1112 return idrange 1113 1114 return idunion 1115 1116 1117def _intersection_idx(idl): 1118 """Returns the intersection of all lists in idl as range or sorted list 1119 1120 Parameters 1121 ---------- 1122 idl : list 1123 List of lists or ranges. 1124 """ 1125 1126 if _check_lists_equal(idl): 1127 return idl[0] 1128 1129 idinter = sorted(set.intersection(*[set(o) for o in idl])) 1130 1131 # Check whether idinter can be expressed as range 1132 try: 1133 idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0]) 1134 idtest = [list(idrange), idinter] 1135 if _check_lists_equal(idtest): 1136 return idrange 1137 except IndexError: 1138 pass 1139 1140 return idinter 1141 1142 1143def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1144 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1145 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1146 common divisor of the step sizes is used as new step size. 1147 1148 Parameters 1149 ---------- 1150 deltas : list 1151 List of fluctuations 1152 idx : list 1153 List or range of configs on which the deltas are defined. 1154 Has to be a subset of new_idx and has to be sorted in ascending order. 1155 shape : list 1156 Number of configs in idx. 1157 new_idx : list 1158 List of configs that defines the new range, has to be sorted in ascending order. 1159 """ 1160 1161 if type(idx) is range and type(new_idx) is range: 1162 if idx == new_idx: 1163 return deltas 1164 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1165 for i in range(shape): 1166 ret[idx[i] - new_idx[0]] = deltas[i] 1167 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) 1168 1169 1170def derived_observable(func, data, array_mode=False, **kwargs): 1171 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1172 1173 Parameters 1174 ---------- 1175 func : object 1176 arbitrary function of the form func(data, **kwargs). For the 1177 automatic differentiation to work, all numpy functions have to have 1178 the autograd wrapper (use 'import autograd.numpy as anp'). 1179 data : list 1180 list of Obs, e.g. [obs1, obs2, obs3]. 1181 num_grad : bool 1182 if True, numerical derivatives are used instead of autograd 1183 (default False). To control the numerical differentiation the 1184 kwargs of numdifftools.step_generators.MaxStepGenerator 1185 can be used. 1186 man_grad : list 1187 manually supply a list or an array which contains the jacobian 1188 of func. Use cautiously, supplying the wrong derivative will 1189 not be intercepted. 1190 1191 Notes 1192 ----- 1193 For simple mathematical operations it can be practical to use anonymous 1194 functions. For the ratio of two observables one can e.g. use 1195 1196 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1197 """ 1198 1199 data = np.asarray(data) 1200 raveled_data = data.ravel() 1201 1202 # Workaround for matrix operations containing non Obs data 1203 if not all(isinstance(x, Obs) for x in raveled_data): 1204 for i in range(len(raveled_data)): 1205 if isinstance(raveled_data[i], (int, float)): 1206 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1207 1208 allcov = {} 1209 for o in raveled_data: 1210 for name in o.cov_names: 1211 if name in allcov: 1212 if not np.allclose(allcov[name], o.covobs[name].cov): 1213 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1214 else: 1215 allcov[name] = o.covobs[name].cov 1216 1217 n_obs = len(raveled_data) 1218 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1219 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1220 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1221 1222 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1223 1224 if data.ndim == 1: 1225 values = np.array([o.value for o in data]) 1226 else: 1227 values = np.vectorize(lambda x: x.value)(data) 1228 1229 new_values = func(values, **kwargs) 1230 1231 multi = int(isinstance(new_values, np.ndarray)) 1232 1233 new_r_values = {} 1234 new_idl_d = {} 1235 for name in new_sample_names: 1236 idl = [] 1237 tmp_values = np.zeros(n_obs) 1238 for i, item in enumerate(raveled_data): 1239 tmp_values[i] = item.r_values.get(name, item.value) 1240 tmp_idl = item.idl.get(name) 1241 if tmp_idl is not None: 1242 idl.append(tmp_idl) 1243 if multi > 0: 1244 tmp_values = np.array(tmp_values).reshape(data.shape) 1245 new_r_values[name] = func(tmp_values, **kwargs) 1246 new_idl_d[name] = _merge_idx(idl) 1247 1248 if 'man_grad' in kwargs: 1249 deriv = np.asarray(kwargs.get('man_grad')) 1250 if new_values.shape + data.shape != deriv.shape: 1251 raise Exception('Manual derivative does not have correct shape.') 1252 elif kwargs.get('num_grad') is True: 1253 if multi > 0: 1254 raise Exception('Multi mode currently not supported for numerical derivative') 1255 options = { 1256 'base_step': 0.1, 1257 'step_ratio': 2.5} 1258 for key in options.keys(): 1259 kwarg = kwargs.get(key) 1260 if kwarg is not None: 1261 options[key] = kwarg 1262 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1263 if tmp_df.size == 1: 1264 deriv = np.array([tmp_df.real]) 1265 else: 1266 deriv = tmp_df.real 1267 else: 1268 deriv = jacobian(func)(values, **kwargs) 1269 1270 final_result = np.zeros(new_values.shape, dtype=object) 1271 1272 if array_mode is True: 1273 1274 class _Zero_grad(): 1275 def __init__(self, N): 1276 self.grad = np.zeros((N, 1)) 1277 1278 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1279 d_extracted = {} 1280 g_extracted = {} 1281 for name in new_sample_names: 1282 d_extracted[name] = [] 1283 ens_length = len(new_idl_d[name]) 1284 for i_dat, dat in enumerate(data): 1285 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1286 for name in new_cov_names: 1287 g_extracted[name] = [] 1288 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1289 for i_dat, dat in enumerate(data): 1290 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1291 1292 for i_val, new_val in np.ndenumerate(new_values): 1293 new_deltas = {} 1294 new_grad = {} 1295 if array_mode is True: 1296 for name in new_sample_names: 1297 ens_length = d_extracted[name][0].shape[-1] 1298 new_deltas[name] = np.zeros(ens_length) 1299 for i_dat, dat in enumerate(d_extracted[name]): 1300 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1301 for name in new_cov_names: 1302 new_grad[name] = 0 1303 for i_dat, dat in enumerate(g_extracted[name]): 1304 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1305 else: 1306 for j_obs, obs in np.ndenumerate(data): 1307 for name in obs.names: 1308 if name in obs.cov_names: 1309 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1310 else: 1311 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1312 1313 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1314 1315 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1316 raise Exception('The same name has been used for deltas and covobs!') 1317 new_samples = [] 1318 new_means = [] 1319 new_idl = [] 1320 new_names_obs = [] 1321 for name in new_names: 1322 if name not in new_covobs: 1323 new_samples.append(new_deltas[name]) 1324 new_idl.append(new_idl_d[name]) 1325 new_means.append(new_r_values[name][i_val]) 1326 new_names_obs.append(name) 1327 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1328 for name in new_covobs: 1329 final_result[i_val].names.append(name) 1330 final_result[i_val]._covobs = new_covobs 1331 final_result[i_val]._value = new_val 1332 final_result[i_val].reweighted = reweighted 1333 1334 if multi == 0: 1335 final_result = final_result.item() 1336 1337 return final_result 1338 1339 1340def _reduce_deltas(deltas, idx_old, idx_new): 1341 """Extract deltas defined on idx_old on all configs of idx_new. 1342 1343 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1344 are ordered in an ascending order. 1345 1346 Parameters 1347 ---------- 1348 deltas : list 1349 List of fluctuations 1350 idx_old : list 1351 List or range of configs on which the deltas are defined 1352 idx_new : list 1353 List of configs for which we want to extract the deltas. 1354 Has to be a subset of idx_old. 1355 """ 1356 if not len(deltas) == len(idx_old): 1357 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1358 if type(idx_old) is range and type(idx_new) is range: 1359 if idx_old == idx_new: 1360 return deltas 1361 if _check_lists_equal([idx_old, idx_new]): 1362 return deltas 1363 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1364 if len(indices) < len(idx_new): 1365 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1366 return np.array(deltas)[indices] 1367 1368 1369def reweight(weight, obs, **kwargs): 1370 """Reweight a list of observables. 1371 1372 Parameters 1373 ---------- 1374 weight : Obs 1375 Reweighting factor. An Observable that has to be defined on a superset of the 1376 configurations in obs[i].idl for all i. 1377 obs : list 1378 list of Obs, e.g. [obs1, obs2, obs3]. 1379 all_configs : bool 1380 if True, the reweighted observables are normalized by the average of 1381 the reweighting factor on all configurations in weight.idl and not 1382 on the configurations in obs[i].idl. Default False. 1383 """ 1384 result = [] 1385 for i in range(len(obs)): 1386 if len(obs[i].cov_names): 1387 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1388 if not set(obs[i].names).issubset(weight.names): 1389 raise Exception('Error: Ensembles do not fit') 1390 for name in obs[i].names: 1391 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1392 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1393 new_samples = [] 1394 w_deltas = {} 1395 for name in sorted(obs[i].names): 1396 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1397 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1398 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1399 1400 if kwargs.get('all_configs'): 1401 new_weight = weight 1402 else: 1403 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1404 1405 result.append(tmp_obs / new_weight) 1406 result[-1].reweighted = True 1407 1408 return result 1409 1410 1411def correlate(obs_a, obs_b): 1412 """Correlate two observables. 1413 1414 Parameters 1415 ---------- 1416 obs_a : Obs 1417 First observable 1418 obs_b : Obs 1419 Second observable 1420 1421 Notes 1422 ----- 1423 Keep in mind to only correlate primary observables which have not been reweighted 1424 yet. The reweighting has to be applied after correlating the observables. 1425 Currently only works if ensembles are identical (this is not strictly necessary). 1426 """ 1427 1428 if sorted(obs_a.names) != sorted(obs_b.names): 1429 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1430 if len(obs_a.cov_names) or len(obs_b.cov_names): 1431 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1432 for name in obs_a.names: 1433 if obs_a.shape[name] != obs_b.shape[name]: 1434 raise Exception('Shapes of ensemble', name, 'do not fit') 1435 if obs_a.idl[name] != obs_b.idl[name]: 1436 raise Exception('idl of ensemble', name, 'do not fit') 1437 1438 if obs_a.reweighted is True: 1439 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1440 if obs_b.reweighted is True: 1441 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1442 1443 new_samples = [] 1444 new_idl = [] 1445 for name in sorted(obs_a.names): 1446 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1447 new_idl.append(obs_a.idl[name]) 1448 1449 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1450 o.reweighted = obs_a.reweighted or obs_b.reweighted 1451 return o 1452 1453 1454def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1455 r'''Calculates the error covariance matrix of a set of observables. 1456 1457 WARNING: This function should be used with care, especially for observables with support on multiple 1458 ensembles with differing autocorrelations. See the notes below for details. 1459 1460 The gamma method has to be applied first to all observables. 1461 1462 Parameters 1463 ---------- 1464 obs : list or numpy.ndarray 1465 List or one dimensional array of Obs 1466 visualize : bool 1467 If True plots the corresponding normalized correlation matrix (default False). 1468 correlation : bool 1469 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1470 smooth : None or int 1471 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1472 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1473 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1474 small ones. 1475 1476 Notes 1477 ----- 1478 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1479 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1480 in the absence of autocorrelation. 1481 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1482 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1483 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1484 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1485 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1486 ''' 1487 1488 length = len(obs) 1489 1490 max_samples = np.max([o.N for o in obs]) 1491 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1492 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1493 1494 cov = np.zeros((length, length)) 1495 for i in range(length): 1496 for j in range(i, length): 1497 cov[i, j] = _covariance_element(obs[i], obs[j]) 1498 cov = cov + cov.T - np.diag(np.diag(cov)) 1499 1500 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1501 1502 if isinstance(smooth, int): 1503 corr = _smooth_eigenvalues(corr, smooth) 1504 1505 if visualize: 1506 plt.matshow(corr, vmin=-1, vmax=1) 1507 plt.set_cmap('RdBu') 1508 plt.colorbar() 1509 plt.draw() 1510 1511 if correlation is True: 1512 return corr 1513 1514 errors = [o.dvalue for o in obs] 1515 cov = np.diag(errors) @ corr @ np.diag(errors) 1516 1517 eigenvalues = np.linalg.eigh(cov)[0] 1518 if not np.all(eigenvalues >= 0): 1519 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1520 1521 return cov 1522 1523 1524def _smooth_eigenvalues(corr, E): 1525 """Eigenvalue smoothing as described in hep-lat/9412087 1526 1527 corr : np.ndarray 1528 correlation matrix 1529 E : integer 1530 Number of eigenvalues to be left substantially unchanged 1531 """ 1532 if not (2 < E < corr.shape[0] - 1): 1533 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1534 vals, vec = np.linalg.eigh(corr) 1535 lambda_min = np.mean(vals[:-E]) 1536 vals[vals < lambda_min] = lambda_min 1537 vals /= np.mean(vals) 1538 return vec @ np.diag(vals) @ vec.T 1539 1540 1541def _covariance_element(obs1, obs2): 1542 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1543 1544 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1545 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1546 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1547 return np.sum(deltas1 * deltas2) 1548 1549 if set(obs1.names).isdisjoint(set(obs2.names)): 1550 return 0.0 1551 1552 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1553 raise Exception('The gamma method has to be applied to both Obs first.') 1554 1555 dvalue = 0.0 1556 1557 for e_name in obs1.mc_names: 1558 1559 if e_name not in obs2.mc_names: 1560 continue 1561 1562 idl_d = {} 1563 for r_name in obs1.e_content[e_name]: 1564 if r_name not in obs2.e_content[e_name]: 1565 continue 1566 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1567 1568 gamma = 0.0 1569 1570 for r_name in obs1.e_content[e_name]: 1571 if r_name not in obs2.e_content[e_name]: 1572 continue 1573 if len(idl_d[r_name]) == 0: 1574 continue 1575 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1576 1577 if gamma == 0.0: 1578 continue 1579 1580 gamma_div = 0.0 1581 for r_name in obs1.e_content[e_name]: 1582 if r_name not in obs2.e_content[e_name]: 1583 continue 1584 if len(idl_d[r_name]) == 0: 1585 continue 1586 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1587 gamma /= gamma_div 1588 1589 dvalue += gamma 1590 1591 for e_name in obs1.cov_names: 1592 1593 if e_name not in obs2.cov_names: 1594 continue 1595 1596 dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() 1597 1598 return dvalue 1599 1600 1601def import_jackknife(jacks, name, idl=None): 1602 """Imports jackknife samples and returns an Obs 1603 1604 Parameters 1605 ---------- 1606 jacks : numpy.ndarray 1607 numpy array containing the mean value as zeroth entry and 1608 the N jackknife samples as first to Nth entry. 1609 name : str 1610 name of the ensemble the samples are defined on. 1611 """ 1612 length = len(jacks) - 1 1613 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1614 samples = jacks[1:] @ prj 1615 mean = np.mean(samples) 1616 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1617 new_obs._value = jacks[0] 1618 return new_obs 1619 1620 1621def import_bootstrap(boots, name, random_numbers): 1622 """Imports bootstrap samples and returns an Obs 1623 1624 Parameters 1625 ---------- 1626 boots : numpy.ndarray 1627 numpy array containing the mean value as zeroth entry and 1628 the N bootstrap samples as first to Nth entry. 1629 name : str 1630 name of the ensemble the samples are defined on. 1631 random_numbers : np.ndarray 1632 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, 1633 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo 1634 chain to be reconstructed. 1635 """ 1636 samples, length = random_numbers.shape 1637 if samples != len(boots) - 1: 1638 raise ValueError("Random numbers do not have the correct shape.") 1639 1640 if samples < length: 1641 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") 1642 1643 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 1644 1645 samples = scipy.linalg.lstsq(proj, boots[1:])[0] 1646 ret = Obs([samples], [name]) 1647 ret._value = boots[0] 1648 return ret 1649 1650 1651def merge_obs(list_of_obs): 1652 """Combine all observables in list_of_obs into one new observable 1653 1654 Parameters 1655 ---------- 1656 list_of_obs : list 1657 list of the Obs object to be combined 1658 1659 Notes 1660 ----- 1661 It is not possible to combine obs which are based on the same replicum 1662 """ 1663 replist = [item for obs in list_of_obs for item in obs.names] 1664 if (len(replist) == len(set(replist))) is False: 1665 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1666 if any([len(o.cov_names) for o in list_of_obs]): 1667 raise Exception('Not possible to merge data that contains covobs!') 1668 new_dict = {} 1669 idl_dict = {} 1670 for o in list_of_obs: 1671 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1672 for key in set(o.deltas) | set(o.r_values)}) 1673 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1674 1675 names = sorted(new_dict.keys()) 1676 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1677 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1678 return o 1679 1680 1681def cov_Obs(means, cov, name, grad=None): 1682 """Create an Obs based on mean(s) and a covariance matrix 1683 1684 Parameters 1685 ---------- 1686 mean : list of floats or float 1687 N mean value(s) of the new Obs 1688 cov : list or array 1689 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1690 name : str 1691 identifier for the covariance matrix 1692 grad : list or array 1693 Gradient of the Covobs wrt. the means belonging to cov. 1694 """ 1695 1696 def covobs_to_obs(co): 1697 """Make an Obs out of a Covobs 1698 1699 Parameters 1700 ---------- 1701 co : Covobs 1702 Covobs to be embedded into the Obs 1703 """ 1704 o = Obs([], [], means=[]) 1705 o._value = co.value 1706 o.names.append(co.name) 1707 o._covobs[co.name] = co 1708 o._dvalue = np.sqrt(co.errsq()) 1709 return o 1710 1711 ol = [] 1712 if isinstance(means, (float, int)): 1713 means = [means] 1714 1715 for i in range(len(means)): 1716 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1717 if ol[0].covobs[name].N != len(means): 1718 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1719 if len(ol) == 1: 1720 return ol[0] 1721 return ol 1722 1723 1724def _determine_gap(o, e_content, e_name): 1725 gaps = [] 1726 for r_name in e_content[e_name]: 1727 if isinstance(o.idl[r_name], range): 1728 gaps.append(o.idl[r_name].step) 1729 else: 1730 gaps.append(np.min(np.diff(o.idl[r_name]))) 1731 1732 gap = min(gaps) 1733 if not np.all([gi % gap == 0 for gi in gaps]): 1734 raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) 1735 1736 return gap 1737 1738 1739def _check_lists_equal(idl): 1740 ''' 1741 Use groupby to efficiently check whether all elements of idl are identical. 1742 Returns True if all elements are equal, otherwise False. 1743 1744 Parameters 1745 ---------- 1746 idl : list of lists, ranges or np.ndarrays 1747 ''' 1748 g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl]) 1749 if next(g, True) and not next(g, False): 1750 return True 1751 return False
19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise ValueError('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise ValueError("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise TypeError('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None 141 142 @property 143 def value(self): 144 return self._value 145 146 @property 147 def dvalue(self): 148 return self._dvalue 149 150 @property 151 def e_names(self): 152 return sorted(set([o.split('|')[0] for o in self.names])) 153 154 @property 155 def cov_names(self): 156 return sorted(set([o for o in self.covobs.keys()])) 157 158 @property 159 def mc_names(self): 160 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 161 162 @property 163 def e_content(self): 164 res = {} 165 for e, e_name in enumerate(self.e_names): 166 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 167 if e_name in self.names: 168 res[e_name].append(e_name) 169 return res 170 171 @property 172 def covobs(self): 173 return self._covobs 174 175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 gapsize = _determine_gap(self, e_content, e_name) 242 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 274 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 275 # Make sure no entry of tauint is smaller than 0.5 276 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 277 # hep-lat/0306017 eq. (42) 278 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 279 self.e_n_dtauint[e_name][0] = 0.0 280 281 def _compute_drho(i): 282 tmp = (self.e_rho[e_name][i + 1:w_max] 283 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 284 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 285 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 286 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 287 288 if self.tau_exp[e_name] > 0: 289 _compute_drho(1) 290 texp = self.tau_exp[e_name] 291 # Critical slowing down analysis 292 if w_max // 2 <= 1: 293 raise Exception("Need at least 8 samples for tau_exp error analysis") 294 for n in range(1, w_max // 2): 295 _compute_drho(n + 1) 296 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 297 # Bias correction hep-lat/0306017 eq. (49) included 298 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 299 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 300 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 301 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 302 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 303 self.e_windowsize[e_name] = n 304 break 305 else: 306 if self.S[e_name] == 0.0: 307 self.e_tauint[e_name] = 0.5 308 self.e_dtauint[e_name] = 0.0 309 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 310 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 311 self.e_windowsize[e_name] = 0 312 else: 313 # Standard automatic windowing procedure 314 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 315 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 316 for n in range(1, w_max): 317 if g_w[n - 1] < 0 or n >= w_max - 1: 318 _compute_drho(n) 319 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 320 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 321 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 322 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 323 self.e_windowsize[e_name] = n 324 break 325 326 self._dvalue += self.e_dvalue[e_name] ** 2 327 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 328 329 for e_name in self.cov_names: 330 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 331 self.e_ddvalue[e_name] = 0 332 self._dvalue += self.e_dvalue[e_name]**2 333 334 self._dvalue = np.sqrt(self._dvalue) 335 if self._dvalue == 0.0: 336 self.ddvalue = 0.0 337 else: 338 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 339 return 340 341 gm = gamma_method 342 343 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 344 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 345 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 346 347 Parameters 348 ---------- 349 deltas : list 350 List of fluctuations 351 idx : list 352 List or range of configurations on which the deltas are defined. 353 shape : int 354 Number of configurations in idx. 355 w_max : int 356 Upper bound for the summation window. 357 fft : bool 358 determines whether the fft algorithm is used for the computation 359 of the autocorrelation function. 360 gapsize : int 361 The target distance between two configurations. If longer distances 362 are found in idx, the data is expanded. 363 """ 364 gamma = np.zeros(w_max) 365 deltas = _expand_deltas(deltas, idx, shape, gapsize) 366 new_shape = len(deltas) 367 if fft: 368 max_gamma = min(new_shape, w_max) 369 # The padding for the fft has to be even 370 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 371 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 372 else: 373 for n in range(w_max): 374 if new_shape - n >= 0: 375 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 376 377 return gamma 378 379 def details(self, ens_content=True): 380 """Output detailed properties of the Obs. 381 382 Parameters 383 ---------- 384 ens_content : bool 385 print details about the ensembles and replica if true. 386 """ 387 if self.tag is not None: 388 print("Description:", self.tag) 389 if not hasattr(self, 'e_dvalue'): 390 print('Result\t %3.8e' % (self.value)) 391 else: 392 if self.value == 0.0: 393 percentage = np.nan 394 else: 395 percentage = np.abs(self._dvalue / self.value) * 100 396 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 397 if len(self.e_names) > 1: 398 print(' Ensemble errors:') 399 e_content = self.e_content 400 for e_name in self.mc_names: 401 gap = _determine_gap(self, e_content, e_name) 402 403 if len(self.e_names) > 1: 404 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 405 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 406 tau_string += f" in units of {gap} config" 407 if gap > 1: 408 tau_string += "s" 409 if self.tau_exp[e_name] > 0: 410 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 411 else: 412 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 413 print(tau_string) 414 for e_name in self.cov_names: 415 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 416 if ens_content is True: 417 if len(self.e_names) == 1: 418 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 419 else: 420 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 421 my_string_list = [] 422 for key, value in sorted(self.e_content.items()): 423 if key not in self.covobs: 424 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 425 if len(value) == 1: 426 my_string += f': {self.shape[value[0]]} configurations' 427 if isinstance(self.idl[value[0]], range): 428 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 429 else: 430 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 431 else: 432 sublist = [] 433 for v in value: 434 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 435 my_substring += f': {self.shape[v]} configurations' 436 if isinstance(self.idl[v], range): 437 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 438 else: 439 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 440 sublist.append(my_substring) 441 442 my_string += '\n' + '\n'.join(sublist) 443 else: 444 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 445 my_string_list.append(my_string) 446 print('\n'.join(my_string_list)) 447 448 def reweight(self, weight): 449 """Reweight the obs with given rewighting factors. 450 451 Parameters 452 ---------- 453 weight : Obs 454 Reweighting factor. An Observable that has to be defined on a superset of the 455 configurations in obs[i].idl for all i. 456 all_configs : bool 457 if True, the reweighted observables are normalized by the average of 458 the reweighting factor on all configurations in weight.idl and not 459 on the configurations in obs[i].idl. Default False. 460 """ 461 return reweight(weight, [self])[0] 462 463 def is_zero_within_error(self, sigma=1): 464 """Checks whether the observable is zero within 'sigma' standard errors. 465 466 Parameters 467 ---------- 468 sigma : int 469 Number of standard errors used for the check. 470 471 Works only properly when the gamma method was run. 472 """ 473 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 474 475 def is_zero(self, atol=1e-10): 476 """Checks whether the observable is zero within a given tolerance. 477 478 Parameters 479 ---------- 480 atol : float 481 Absolute tolerance (for details see numpy documentation). 482 """ 483 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 484 485 def plot_tauint(self, save=None): 486 """Plot integrated autocorrelation time for each ensemble. 487 488 Parameters 489 ---------- 490 save : str 491 saves the figure to a file named 'save' if. 492 """ 493 if not hasattr(self, 'e_dvalue'): 494 raise Exception('Run the gamma method first.') 495 496 for e, e_name in enumerate(self.mc_names): 497 fig = plt.figure() 498 plt.xlabel(r'$W$') 499 plt.ylabel(r'$\tau_\mathrm{int}$') 500 length = int(len(self.e_n_tauint[e_name])) 501 if self.tau_exp[e_name] > 0: 502 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 503 x_help = np.arange(2 * self.tau_exp[e_name]) 504 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 505 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 506 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 507 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 508 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 511 else: 512 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 513 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 514 515 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 516 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 517 plt.legend() 518 plt.xlim(-0.5, xmax) 519 ylim = plt.ylim() 520 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 521 plt.draw() 522 if save: 523 fig.savefig(save + "_" + str(e)) 524 525 def plot_rho(self, save=None): 526 """Plot normalized autocorrelation function time for each ensemble. 527 528 Parameters 529 ---------- 530 save : str 531 saves the figure to a file named 'save' if. 532 """ 533 if not hasattr(self, 'e_dvalue'): 534 raise Exception('Run the gamma method first.') 535 for e, e_name in enumerate(self.mc_names): 536 fig = plt.figure() 537 plt.xlabel('W') 538 plt.ylabel('rho') 539 length = int(len(self.e_drho[e_name])) 540 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 541 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 542 if self.tau_exp[e_name] > 0: 543 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 544 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 545 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 546 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 547 else: 548 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 549 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 550 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 551 plt.xlim(-0.5, xmax) 552 plt.draw() 553 if save: 554 fig.savefig(save + "_" + str(e)) 555 556 def plot_rep_dist(self): 557 """Plot replica distribution for each ensemble with more than one replicum.""" 558 if not hasattr(self, 'e_dvalue'): 559 raise Exception('Run the gamma method first.') 560 for e, e_name in enumerate(self.mc_names): 561 if len(self.e_content[e_name]) == 1: 562 print('No replica distribution for a single replicum (', e_name, ')') 563 continue 564 r_length = [] 565 sub_r_mean = 0 566 for r, r_name in enumerate(self.e_content[e_name]): 567 r_length.append(len(self.deltas[r_name])) 568 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 569 e_N = np.sum(r_length) 570 sub_r_mean /= e_N 571 arr = np.zeros(len(self.e_content[e_name])) 572 for r, r_name in enumerate(self.e_content[e_name]): 573 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 574 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 575 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 576 plt.draw() 577 578 def plot_history(self, expand=True): 579 """Plot derived Monte Carlo history for each ensemble 580 581 Parameters 582 ---------- 583 expand : bool 584 show expanded history for irregular Monte Carlo chains (default: True). 585 """ 586 for e, e_name in enumerate(self.mc_names): 587 plt.figure() 588 r_length = [] 589 tmp = [] 590 tmp_expanded = [] 591 for r, r_name in enumerate(self.e_content[e_name]): 592 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 593 if expand: 594 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 595 r_length.append(len(tmp_expanded[-1])) 596 else: 597 r_length.append(len(tmp[-1])) 598 e_N = np.sum(r_length) 599 x = np.arange(e_N) 600 y_test = np.concatenate(tmp, axis=0) 601 if expand: 602 y = np.concatenate(tmp_expanded, axis=0) 603 else: 604 y = y_test 605 plt.errorbar(x, y, fmt='.', markersize=3) 606 plt.xlim(-0.5, e_N - 0.5) 607 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 608 plt.draw() 609 610 def plot_piechart(self, save=None): 611 """Plot piechart which shows the fractional contribution of each 612 ensemble to the error and returns a dictionary containing the fractions. 613 614 Parameters 615 ---------- 616 save : str 617 saves the figure to a file named 'save' if. 618 """ 619 if not hasattr(self, 'e_dvalue'): 620 raise Exception('Run the gamma method first.') 621 if np.isclose(0.0, self._dvalue, atol=1e-15): 622 raise Exception('Error is 0.0') 623 labels = self.e_names 624 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 625 fig1, ax1 = plt.subplots() 626 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 627 ax1.axis('equal') 628 plt.draw() 629 if save: 630 fig1.savefig(save) 631 632 return dict(zip(labels, sizes)) 633 634 def dump(self, filename, datatype="json.gz", description="", **kwargs): 635 """Dump the Obs to a file 'name' of chosen format. 636 637 Parameters 638 ---------- 639 filename : str 640 name of the file to be saved. 641 datatype : str 642 Format of the exported file. Supported formats include 643 "json.gz" and "pickle" 644 description : str 645 Description for output file, only relevant for json.gz format. 646 path : str 647 specifies a custom path for the file (default '.') 648 """ 649 if 'path' in kwargs: 650 file_name = kwargs.get('path') + '/' + filename 651 else: 652 file_name = filename 653 654 if datatype == "json.gz": 655 from .input.json import dump_to_json 656 dump_to_json([self], file_name, description=description) 657 elif datatype == "pickle": 658 with open(file_name + '.p', 'wb') as fb: 659 pickle.dump(self, fb) 660 else: 661 raise Exception("Unknown datatype " + str(datatype)) 662 663 def export_jackknife(self): 664 """Export jackknife samples from the Obs 665 666 Returns 667 ------- 668 numpy.ndarray 669 Returns a numpy array of length N + 1 where N is the number of samples 670 for the given ensemble and replicum. The zeroth entry of the array contains 671 the mean value of the Obs, entries 1 to N contain the N jackknife samples 672 derived from the Obs. The current implementation only works for observables 673 defined on exactly one ensemble and replicum. The derived jackknife samples 674 should agree with samples from a full jackknife analysis up to O(1/N). 675 """ 676 677 if len(self.names) != 1: 678 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 679 680 name = self.names[0] 681 full_data = self.deltas[name] + self.r_values[name] 682 n = full_data.size 683 mean = self.value 684 tmp_jacks = np.zeros(n + 1) 685 tmp_jacks[0] = mean 686 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 687 return tmp_jacks 688 689 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 690 """Export bootstrap samples from the Obs 691 692 Parameters 693 ---------- 694 samples : int 695 Number of bootstrap samples to generate. 696 random_numbers : np.ndarray 697 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 698 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 699 save_rng : str 700 Save the random numbers to a file if a path is specified. 701 702 Returns 703 ------- 704 numpy.ndarray 705 Returns a numpy array of length N + 1 where N is the number of samples 706 for the given ensemble and replicum. The zeroth entry of the array contains 707 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 708 derived from the Obs. The current implementation only works for observables 709 defined on exactly one ensemble and replicum. The derived bootstrap samples 710 should agree with samples from a full bootstrap analysis up to O(1/N). 711 """ 712 if len(self.names) != 1: 713 raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 714 715 name = self.names[0] 716 length = self.N 717 718 if random_numbers is None: 719 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 720 rng = np.random.default_rng(seed) 721 random_numbers = rng.integers(0, length, size=(samples, length)) 722 723 if save_rng is not None: 724 np.savetxt(save_rng, random_numbers, fmt='%i') 725 726 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 727 ret = np.zeros(samples + 1) 728 ret[0] = self.value 729 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 730 return ret 731 732 def __float__(self): 733 return float(self.value) 734 735 def __repr__(self): 736 return 'Obs[' + str(self) + ']' 737 738 def __str__(self): 739 return _format_uncertainty(self.value, self._dvalue) 740 741 def __format__(self, format_type): 742 if format_type == "": 743 significance = 2 744 else: 745 significance = int(float(format_type.replace("+", "").replace("-", ""))) 746 my_str = _format_uncertainty(self.value, self._dvalue, 747 significance=significance) 748 for char in ["+", " "]: 749 if format_type.startswith(char): 750 if my_str[0] != "-": 751 my_str = char + my_str 752 return my_str 753 754 def __hash__(self): 755 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 756 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 757 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 758 hash_tuple += tuple([o.encode() for o in self.names]) 759 m = hashlib.md5() 760 [m.update(o) for o in hash_tuple] 761 return int(m.hexdigest(), 16) & 0xFFFFFFFF 762 763 # Overload comparisons 764 def __lt__(self, other): 765 return self.value < other 766 767 def __le__(self, other): 768 return self.value <= other 769 770 def __gt__(self, other): 771 return self.value > other 772 773 def __ge__(self, other): 774 return self.value >= other 775 776 def __eq__(self, other): 777 return (self - other).is_zero() 778 779 def __ne__(self, other): 780 return not (self - other).is_zero() 781 782 # Overload math operations 783 def __add__(self, y): 784 if isinstance(y, Obs): 785 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 786 else: 787 if isinstance(y, np.ndarray): 788 return np.array([self + o for o in y]) 789 elif y.__class__.__name__ in ['Corr', 'CObs']: 790 return NotImplemented 791 else: 792 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 793 794 def __radd__(self, y): 795 return self + y 796 797 def __mul__(self, y): 798 if isinstance(y, Obs): 799 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 800 else: 801 if isinstance(y, np.ndarray): 802 return np.array([self * o for o in y]) 803 elif isinstance(y, complex): 804 return CObs(self * y.real, self * y.imag) 805 elif y.__class__.__name__ in ['Corr', 'CObs']: 806 return NotImplemented 807 else: 808 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 809 810 def __rmul__(self, y): 811 return self * y 812 813 def __sub__(self, y): 814 if isinstance(y, Obs): 815 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 816 else: 817 if isinstance(y, np.ndarray): 818 return np.array([self - o for o in y]) 819 elif y.__class__.__name__ in ['Corr', 'CObs']: 820 return NotImplemented 821 else: 822 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 823 824 def __rsub__(self, y): 825 return -1 * (self - y) 826 827 def __pos__(self): 828 return self 829 830 def __neg__(self): 831 return -1 * self 832 833 def __truediv__(self, y): 834 if isinstance(y, Obs): 835 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 836 else: 837 if isinstance(y, np.ndarray): 838 return np.array([self / o for o in y]) 839 elif y.__class__.__name__ in ['Corr', 'CObs']: 840 return NotImplemented 841 else: 842 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 843 844 def __rtruediv__(self, y): 845 if isinstance(y, Obs): 846 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 847 else: 848 if isinstance(y, np.ndarray): 849 return np.array([o / self for o in y]) 850 elif y.__class__.__name__ in ['Corr', 'CObs']: 851 return NotImplemented 852 else: 853 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 854 855 def __pow__(self, y): 856 if isinstance(y, Obs): 857 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 858 else: 859 return derived_observable(lambda x: x[0] ** y, [self]) 860 861 def __rpow__(self, y): 862 if isinstance(y, Obs): 863 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 864 else: 865 return derived_observable(lambda x: y ** x[0], [self]) 866 867 def __abs__(self): 868 return derived_observable(lambda x: anp.abs(x[0]), [self]) 869 870 # Overload numpy functions 871 def sqrt(self): 872 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 873 874 def log(self): 875 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 876 877 def exp(self): 878 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 879 880 def sin(self): 881 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 882 883 def cos(self): 884 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 885 886 def tan(self): 887 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 888 889 def arcsin(self): 890 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 891 892 def arccos(self): 893 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 894 895 def arctan(self): 896 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 897 898 def sinh(self): 899 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 900 901 def cosh(self): 902 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 903 904 def tanh(self): 905 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 906 907 def arcsinh(self): 908 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 909 910 def arccosh(self): 911 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 912 913 def arctanh(self): 914 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise ValueError('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise ValueError("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise TypeError('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 gapsize = _determine_gap(self, e_content, e_name) 242 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 274 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 275 # Make sure no entry of tauint is smaller than 0.5 276 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 277 # hep-lat/0306017 eq. (42) 278 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 279 self.e_n_dtauint[e_name][0] = 0.0 280 281 def _compute_drho(i): 282 tmp = (self.e_rho[e_name][i + 1:w_max] 283 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 284 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 285 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 286 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 287 288 if self.tau_exp[e_name] > 0: 289 _compute_drho(1) 290 texp = self.tau_exp[e_name] 291 # Critical slowing down analysis 292 if w_max // 2 <= 1: 293 raise Exception("Need at least 8 samples for tau_exp error analysis") 294 for n in range(1, w_max // 2): 295 _compute_drho(n + 1) 296 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 297 # Bias correction hep-lat/0306017 eq. (49) included 298 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 299 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 300 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 301 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 302 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 303 self.e_windowsize[e_name] = n 304 break 305 else: 306 if self.S[e_name] == 0.0: 307 self.e_tauint[e_name] = 0.5 308 self.e_dtauint[e_name] = 0.0 309 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 310 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 311 self.e_windowsize[e_name] = 0 312 else: 313 # Standard automatic windowing procedure 314 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 315 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 316 for n in range(1, w_max): 317 if g_w[n - 1] < 0 or n >= w_max - 1: 318 _compute_drho(n) 319 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 320 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 321 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 322 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 323 self.e_windowsize[e_name] = n 324 break 325 326 self._dvalue += self.e_dvalue[e_name] ** 2 327 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 328 329 for e_name in self.cov_names: 330 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 331 self.e_ddvalue[e_name] = 0 332 self._dvalue += self.e_dvalue[e_name]**2 333 334 self._dvalue = np.sqrt(self._dvalue) 335 if self._dvalue == 0.0: 336 self.ddvalue = 0.0 337 else: 338 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 339 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 gapsize = _determine_gap(self, e_content, e_name) 242 243 r_length = [] 244 for r_name in e_content[e_name]: 245 if isinstance(self.idl[r_name], range): 246 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 247 else: 248 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 249 250 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 251 w_max = max(r_length) // 2 252 e_gamma[e_name] = np.zeros(w_max) 253 self.e_rho[e_name] = np.zeros(w_max) 254 self.e_drho[e_name] = np.zeros(w_max) 255 256 for r_name in e_content[e_name]: 257 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 258 259 gamma_div = np.zeros(w_max) 260 for r_name in e_content[e_name]: 261 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 gamma_div[gamma_div < 1] = 1.0 263 e_gamma[e_name] /= gamma_div[:w_max] 264 265 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 266 self.e_tauint[e_name] = 0.5 267 self.e_dtauint[e_name] = 0.0 268 self.e_dvalue[e_name] = 0.0 269 self.e_ddvalue[e_name] = 0.0 270 self.e_windowsize[e_name] = 0 271 continue 272 273 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 274 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 275 # Make sure no entry of tauint is smaller than 0.5 276 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 277 # hep-lat/0306017 eq. (42) 278 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 279 self.e_n_dtauint[e_name][0] = 0.0 280 281 def _compute_drho(i): 282 tmp = (self.e_rho[e_name][i + 1:w_max] 283 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 284 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 285 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 286 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 287 288 if self.tau_exp[e_name] > 0: 289 _compute_drho(1) 290 texp = self.tau_exp[e_name] 291 # Critical slowing down analysis 292 if w_max // 2 <= 1: 293 raise Exception("Need at least 8 samples for tau_exp error analysis") 294 for n in range(1, w_max // 2): 295 _compute_drho(n + 1) 296 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 297 # Bias correction hep-lat/0306017 eq. (49) included 298 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 299 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 300 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 301 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 302 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 303 self.e_windowsize[e_name] = n 304 break 305 else: 306 if self.S[e_name] == 0.0: 307 self.e_tauint[e_name] = 0.5 308 self.e_dtauint[e_name] = 0.0 309 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 310 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 311 self.e_windowsize[e_name] = 0 312 else: 313 # Standard automatic windowing procedure 314 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 315 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 316 for n in range(1, w_max): 317 if g_w[n - 1] < 0 or n >= w_max - 1: 318 _compute_drho(n) 319 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 320 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 321 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 322 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 323 self.e_windowsize[e_name] = n 324 break 325 326 self._dvalue += self.e_dvalue[e_name] ** 2 327 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 328 329 for e_name in self.cov_names: 330 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 331 self.e_ddvalue[e_name] = 0 332 self._dvalue += self.e_dvalue[e_name]**2 333 334 self._dvalue = np.sqrt(self._dvalue) 335 if self._dvalue == 0.0: 336 self.ddvalue = 0.0 337 else: 338 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 339 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
379 def details(self, ens_content=True): 380 """Output detailed properties of the Obs. 381 382 Parameters 383 ---------- 384 ens_content : bool 385 print details about the ensembles and replica if true. 386 """ 387 if self.tag is not None: 388 print("Description:", self.tag) 389 if not hasattr(self, 'e_dvalue'): 390 print('Result\t %3.8e' % (self.value)) 391 else: 392 if self.value == 0.0: 393 percentage = np.nan 394 else: 395 percentage = np.abs(self._dvalue / self.value) * 100 396 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 397 if len(self.e_names) > 1: 398 print(' Ensemble errors:') 399 e_content = self.e_content 400 for e_name in self.mc_names: 401 gap = _determine_gap(self, e_content, e_name) 402 403 if len(self.e_names) > 1: 404 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 405 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 406 tau_string += f" in units of {gap} config" 407 if gap > 1: 408 tau_string += "s" 409 if self.tau_exp[e_name] > 0: 410 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 411 else: 412 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 413 print(tau_string) 414 for e_name in self.cov_names: 415 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 416 if ens_content is True: 417 if len(self.e_names) == 1: 418 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 419 else: 420 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 421 my_string_list = [] 422 for key, value in sorted(self.e_content.items()): 423 if key not in self.covobs: 424 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 425 if len(value) == 1: 426 my_string += f': {self.shape[value[0]]} configurations' 427 if isinstance(self.idl[value[0]], range): 428 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 429 else: 430 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 431 else: 432 sublist = [] 433 for v in value: 434 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 435 my_substring += f': {self.shape[v]} configurations' 436 if isinstance(self.idl[v], range): 437 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 438 else: 439 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 440 sublist.append(my_substring) 441 442 my_string += '\n' + '\n'.join(sublist) 443 else: 444 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 445 my_string_list.append(my_string) 446 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
448 def reweight(self, weight): 449 """Reweight the obs with given rewighting factors. 450 451 Parameters 452 ---------- 453 weight : Obs 454 Reweighting factor. An Observable that has to be defined on a superset of the 455 configurations in obs[i].idl for all i. 456 all_configs : bool 457 if True, the reweighted observables are normalized by the average of 458 the reweighting factor on all configurations in weight.idl and not 459 on the configurations in obs[i].idl. Default False. 460 """ 461 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
463 def is_zero_within_error(self, sigma=1): 464 """Checks whether the observable is zero within 'sigma' standard errors. 465 466 Parameters 467 ---------- 468 sigma : int 469 Number of standard errors used for the check. 470 471 Works only properly when the gamma method was run. 472 """ 473 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
475 def is_zero(self, atol=1e-10): 476 """Checks whether the observable is zero within a given tolerance. 477 478 Parameters 479 ---------- 480 atol : float 481 Absolute tolerance (for details see numpy documentation). 482 """ 483 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
485 def plot_tauint(self, save=None): 486 """Plot integrated autocorrelation time for each ensemble. 487 488 Parameters 489 ---------- 490 save : str 491 saves the figure to a file named 'save' if. 492 """ 493 if not hasattr(self, 'e_dvalue'): 494 raise Exception('Run the gamma method first.') 495 496 for e, e_name in enumerate(self.mc_names): 497 fig = plt.figure() 498 plt.xlabel(r'$W$') 499 plt.ylabel(r'$\tau_\mathrm{int}$') 500 length = int(len(self.e_n_tauint[e_name])) 501 if self.tau_exp[e_name] > 0: 502 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 503 x_help = np.arange(2 * self.tau_exp[e_name]) 504 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 505 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 506 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 507 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 508 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 511 else: 512 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 513 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 514 515 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 516 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 517 plt.legend() 518 plt.xlim(-0.5, xmax) 519 ylim = plt.ylim() 520 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 521 plt.draw() 522 if save: 523 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
525 def plot_rho(self, save=None): 526 """Plot normalized autocorrelation function time for each ensemble. 527 528 Parameters 529 ---------- 530 save : str 531 saves the figure to a file named 'save' if. 532 """ 533 if not hasattr(self, 'e_dvalue'): 534 raise Exception('Run the gamma method first.') 535 for e, e_name in enumerate(self.mc_names): 536 fig = plt.figure() 537 plt.xlabel('W') 538 plt.ylabel('rho') 539 length = int(len(self.e_drho[e_name])) 540 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 541 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 542 if self.tau_exp[e_name] > 0: 543 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 544 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 545 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 546 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 547 else: 548 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 549 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 550 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 551 plt.xlim(-0.5, xmax) 552 plt.draw() 553 if save: 554 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
556 def plot_rep_dist(self): 557 """Plot replica distribution for each ensemble with more than one replicum.""" 558 if not hasattr(self, 'e_dvalue'): 559 raise Exception('Run the gamma method first.') 560 for e, e_name in enumerate(self.mc_names): 561 if len(self.e_content[e_name]) == 1: 562 print('No replica distribution for a single replicum (', e_name, ')') 563 continue 564 r_length = [] 565 sub_r_mean = 0 566 for r, r_name in enumerate(self.e_content[e_name]): 567 r_length.append(len(self.deltas[r_name])) 568 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 569 e_N = np.sum(r_length) 570 sub_r_mean /= e_N 571 arr = np.zeros(len(self.e_content[e_name])) 572 for r, r_name in enumerate(self.e_content[e_name]): 573 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 574 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 575 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 576 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
578 def plot_history(self, expand=True): 579 """Plot derived Monte Carlo history for each ensemble 580 581 Parameters 582 ---------- 583 expand : bool 584 show expanded history for irregular Monte Carlo chains (default: True). 585 """ 586 for e, e_name in enumerate(self.mc_names): 587 plt.figure() 588 r_length = [] 589 tmp = [] 590 tmp_expanded = [] 591 for r, r_name in enumerate(self.e_content[e_name]): 592 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 593 if expand: 594 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 595 r_length.append(len(tmp_expanded[-1])) 596 else: 597 r_length.append(len(tmp[-1])) 598 e_N = np.sum(r_length) 599 x = np.arange(e_N) 600 y_test = np.concatenate(tmp, axis=0) 601 if expand: 602 y = np.concatenate(tmp_expanded, axis=0) 603 else: 604 y = y_test 605 plt.errorbar(x, y, fmt='.', markersize=3) 606 plt.xlim(-0.5, e_N - 0.5) 607 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 608 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
610 def plot_piechart(self, save=None): 611 """Plot piechart which shows the fractional contribution of each 612 ensemble to the error and returns a dictionary containing the fractions. 613 614 Parameters 615 ---------- 616 save : str 617 saves the figure to a file named 'save' if. 618 """ 619 if not hasattr(self, 'e_dvalue'): 620 raise Exception('Run the gamma method first.') 621 if np.isclose(0.0, self._dvalue, atol=1e-15): 622 raise Exception('Error is 0.0') 623 labels = self.e_names 624 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 625 fig1, ax1 = plt.subplots() 626 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 627 ax1.axis('equal') 628 plt.draw() 629 if save: 630 fig1.savefig(save) 631 632 return dict(zip(labels, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
634 def dump(self, filename, datatype="json.gz", description="", **kwargs): 635 """Dump the Obs to a file 'name' of chosen format. 636 637 Parameters 638 ---------- 639 filename : str 640 name of the file to be saved. 641 datatype : str 642 Format of the exported file. Supported formats include 643 "json.gz" and "pickle" 644 description : str 645 Description for output file, only relevant for json.gz format. 646 path : str 647 specifies a custom path for the file (default '.') 648 """ 649 if 'path' in kwargs: 650 file_name = kwargs.get('path') + '/' + filename 651 else: 652 file_name = filename 653 654 if datatype == "json.gz": 655 from .input.json import dump_to_json 656 dump_to_json([self], file_name, description=description) 657 elif datatype == "pickle": 658 with open(file_name + '.p', 'wb') as fb: 659 pickle.dump(self, fb) 660 else: 661 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
663 def export_jackknife(self): 664 """Export jackknife samples from the Obs 665 666 Returns 667 ------- 668 numpy.ndarray 669 Returns a numpy array of length N + 1 where N is the number of samples 670 for the given ensemble and replicum. The zeroth entry of the array contains 671 the mean value of the Obs, entries 1 to N contain the N jackknife samples 672 derived from the Obs. The current implementation only works for observables 673 defined on exactly one ensemble and replicum. The derived jackknife samples 674 should agree with samples from a full jackknife analysis up to O(1/N). 675 """ 676 677 if len(self.names) != 1: 678 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 679 680 name = self.names[0] 681 full_data = self.deltas[name] + self.r_values[name] 682 n = full_data.size 683 mean = self.value 684 tmp_jacks = np.zeros(n + 1) 685 tmp_jacks[0] = mean 686 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 687 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
689 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 690 """Export bootstrap samples from the Obs 691 692 Parameters 693 ---------- 694 samples : int 695 Number of bootstrap samples to generate. 696 random_numbers : np.ndarray 697 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 698 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 699 save_rng : str 700 Save the random numbers to a file if a path is specified. 701 702 Returns 703 ------- 704 numpy.ndarray 705 Returns a numpy array of length N + 1 where N is the number of samples 706 for the given ensemble and replicum. The zeroth entry of the array contains 707 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 708 derived from the Obs. The current implementation only works for observables 709 defined on exactly one ensemble and replicum. The derived bootstrap samples 710 should agree with samples from a full bootstrap analysis up to O(1/N). 711 """ 712 if len(self.names) != 1: 713 raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 714 715 name = self.names[0] 716 length = self.N 717 718 if random_numbers is None: 719 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 720 rng = np.random.default_rng(seed) 721 random_numbers = rng.integers(0, length, size=(samples, length)) 722 723 if save_rng is not None: 724 np.savetxt(save_rng, random_numbers, fmt='%i') 725 726 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 727 ret = np.zeros(samples + 1) 728 ret[0] = self.value 729 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 730 return ret
Export bootstrap samples from the Obs
Parameters
- samples (int): Number of bootstrap samples to generate.
- random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
- save_rng (str): Save the random numbers to a file if a path is specified.
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived bootstrap samples should agree with samples from a full bootstrap analysis up to O(1/N).
917class CObs: 918 """Class for a complex valued observable.""" 919 __slots__ = ['_real', '_imag', 'tag'] 920 921 def __init__(self, real, imag=0.0): 922 self._real = real 923 self._imag = imag 924 self.tag = None 925 926 @property 927 def real(self): 928 return self._real 929 930 @property 931 def imag(self): 932 return self._imag 933 934 def gamma_method(self, **kwargs): 935 """Executes the gamma_method for the real and the imaginary part.""" 936 if isinstance(self.real, Obs): 937 self.real.gamma_method(**kwargs) 938 if isinstance(self.imag, Obs): 939 self.imag.gamma_method(**kwargs) 940 941 def is_zero(self): 942 """Checks whether both real and imaginary part are zero within machine precision.""" 943 return self.real == 0.0 and self.imag == 0.0 944 945 def conjugate(self): 946 return CObs(self.real, -self.imag) 947 948 def __add__(self, other): 949 if isinstance(other, np.ndarray): 950 return other + self 951 elif hasattr(other, 'real') and hasattr(other, 'imag'): 952 return CObs(self.real + other.real, 953 self.imag + other.imag) 954 else: 955 return CObs(self.real + other, self.imag) 956 957 def __radd__(self, y): 958 return self + y 959 960 def __sub__(self, other): 961 if isinstance(other, np.ndarray): 962 return -1 * (other - self) 963 elif hasattr(other, 'real') and hasattr(other, 'imag'): 964 return CObs(self.real - other.real, self.imag - other.imag) 965 else: 966 return CObs(self.real - other, self.imag) 967 968 def __rsub__(self, other): 969 return -1 * (self - other) 970 971 def __mul__(self, other): 972 if isinstance(other, np.ndarray): 973 return other * self 974 elif hasattr(other, 'real') and hasattr(other, 'imag'): 975 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 976 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 977 [self.real, other.real, self.imag, other.imag], 978 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 979 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 980 [self.real, other.real, self.imag, other.imag], 981 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 982 elif getattr(other, 'imag', 0) != 0: 983 return CObs(self.real * other.real - self.imag * other.imag, 984 self.imag * other.real + self.real * other.imag) 985 else: 986 return CObs(self.real * other.real, self.imag * other.real) 987 else: 988 return CObs(self.real * other, self.imag * other) 989 990 def __rmul__(self, other): 991 return self * other 992 993 def __truediv__(self, other): 994 if isinstance(other, np.ndarray): 995 return 1 / (other / self) 996 elif hasattr(other, 'real') and hasattr(other, 'imag'): 997 r = other.real ** 2 + other.imag ** 2 998 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 999 else: 1000 return CObs(self.real / other, self.imag / other) 1001 1002 def __rtruediv__(self, other): 1003 r = self.real ** 2 + self.imag ** 2 1004 if hasattr(other, 'real') and hasattr(other, 'imag'): 1005 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 1006 else: 1007 return CObs(self.real * other / r, -self.imag * other / r) 1008 1009 def __abs__(self): 1010 return np.sqrt(self.real**2 + self.imag**2) 1011 1012 def __pos__(self): 1013 return self 1014 1015 def __neg__(self): 1016 return -1 * self 1017 1018 def __eq__(self, other): 1019 return self.real == other.real and self.imag == other.imag 1020 1021 def __str__(self): 1022 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 1023 1024 def __repr__(self): 1025 return 'CObs[' + str(self) + ']' 1026 1027 def __format__(self, format_type): 1028 if format_type == "": 1029 significance = 2 1030 format_type = "2" 1031 else: 1032 significance = int(float(format_type.replace("+", "").replace("-", ""))) 1033 return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
Class for a complex valued observable.
934 def gamma_method(self, **kwargs): 935 """Executes the gamma_method for the real and the imaginary part.""" 936 if isinstance(self.real, Obs): 937 self.real.gamma_method(**kwargs) 938 if isinstance(self.imag, Obs): 939 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1036def gamma_method(x, **kwargs): 1037 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1038 1039 See docstring of pe.Obs.gamma_method for details. 1040 """ 1041 return np.vectorize(lambda o: o.gm(**kwargs))(x)
Vectorized version of the gamma_method applicable to lists or arrays of Obs.
See docstring of pe.Obs.gamma_method for details.
1044def gm(x, **kwargs): 1045 """Short version of the vectorized gamma_method. 1046 1047 See docstring of pe.Obs.gamma_method for details 1048 """ 1049 return gamma_method(x, **kwargs)
Short version of the vectorized gamma_method.
See docstring of pe.Obs.gamma_method for details
1171def derived_observable(func, data, array_mode=False, **kwargs): 1172 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1173 1174 Parameters 1175 ---------- 1176 func : object 1177 arbitrary function of the form func(data, **kwargs). For the 1178 automatic differentiation to work, all numpy functions have to have 1179 the autograd wrapper (use 'import autograd.numpy as anp'). 1180 data : list 1181 list of Obs, e.g. [obs1, obs2, obs3]. 1182 num_grad : bool 1183 if True, numerical derivatives are used instead of autograd 1184 (default False). To control the numerical differentiation the 1185 kwargs of numdifftools.step_generators.MaxStepGenerator 1186 can be used. 1187 man_grad : list 1188 manually supply a list or an array which contains the jacobian 1189 of func. Use cautiously, supplying the wrong derivative will 1190 not be intercepted. 1191 1192 Notes 1193 ----- 1194 For simple mathematical operations it can be practical to use anonymous 1195 functions. For the ratio of two observables one can e.g. use 1196 1197 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1198 """ 1199 1200 data = np.asarray(data) 1201 raveled_data = data.ravel() 1202 1203 # Workaround for matrix operations containing non Obs data 1204 if not all(isinstance(x, Obs) for x in raveled_data): 1205 for i in range(len(raveled_data)): 1206 if isinstance(raveled_data[i], (int, float)): 1207 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1208 1209 allcov = {} 1210 for o in raveled_data: 1211 for name in o.cov_names: 1212 if name in allcov: 1213 if not np.allclose(allcov[name], o.covobs[name].cov): 1214 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1215 else: 1216 allcov[name] = o.covobs[name].cov 1217 1218 n_obs = len(raveled_data) 1219 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1220 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1221 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1222 1223 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1224 1225 if data.ndim == 1: 1226 values = np.array([o.value for o in data]) 1227 else: 1228 values = np.vectorize(lambda x: x.value)(data) 1229 1230 new_values = func(values, **kwargs) 1231 1232 multi = int(isinstance(new_values, np.ndarray)) 1233 1234 new_r_values = {} 1235 new_idl_d = {} 1236 for name in new_sample_names: 1237 idl = [] 1238 tmp_values = np.zeros(n_obs) 1239 for i, item in enumerate(raveled_data): 1240 tmp_values[i] = item.r_values.get(name, item.value) 1241 tmp_idl = item.idl.get(name) 1242 if tmp_idl is not None: 1243 idl.append(tmp_idl) 1244 if multi > 0: 1245 tmp_values = np.array(tmp_values).reshape(data.shape) 1246 new_r_values[name] = func(tmp_values, **kwargs) 1247 new_idl_d[name] = _merge_idx(idl) 1248 1249 if 'man_grad' in kwargs: 1250 deriv = np.asarray(kwargs.get('man_grad')) 1251 if new_values.shape + data.shape != deriv.shape: 1252 raise Exception('Manual derivative does not have correct shape.') 1253 elif kwargs.get('num_grad') is True: 1254 if multi > 0: 1255 raise Exception('Multi mode currently not supported for numerical derivative') 1256 options = { 1257 'base_step': 0.1, 1258 'step_ratio': 2.5} 1259 for key in options.keys(): 1260 kwarg = kwargs.get(key) 1261 if kwarg is not None: 1262 options[key] = kwarg 1263 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1264 if tmp_df.size == 1: 1265 deriv = np.array([tmp_df.real]) 1266 else: 1267 deriv = tmp_df.real 1268 else: 1269 deriv = jacobian(func)(values, **kwargs) 1270 1271 final_result = np.zeros(new_values.shape, dtype=object) 1272 1273 if array_mode is True: 1274 1275 class _Zero_grad(): 1276 def __init__(self, N): 1277 self.grad = np.zeros((N, 1)) 1278 1279 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1280 d_extracted = {} 1281 g_extracted = {} 1282 for name in new_sample_names: 1283 d_extracted[name] = [] 1284 ens_length = len(new_idl_d[name]) 1285 for i_dat, dat in enumerate(data): 1286 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1287 for name in new_cov_names: 1288 g_extracted[name] = [] 1289 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1290 for i_dat, dat in enumerate(data): 1291 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1292 1293 for i_val, new_val in np.ndenumerate(new_values): 1294 new_deltas = {} 1295 new_grad = {} 1296 if array_mode is True: 1297 for name in new_sample_names: 1298 ens_length = d_extracted[name][0].shape[-1] 1299 new_deltas[name] = np.zeros(ens_length) 1300 for i_dat, dat in enumerate(d_extracted[name]): 1301 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1302 for name in new_cov_names: 1303 new_grad[name] = 0 1304 for i_dat, dat in enumerate(g_extracted[name]): 1305 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1306 else: 1307 for j_obs, obs in np.ndenumerate(data): 1308 for name in obs.names: 1309 if name in obs.cov_names: 1310 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1311 else: 1312 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1313 1314 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1315 1316 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1317 raise Exception('The same name has been used for deltas and covobs!') 1318 new_samples = [] 1319 new_means = [] 1320 new_idl = [] 1321 new_names_obs = [] 1322 for name in new_names: 1323 if name not in new_covobs: 1324 new_samples.append(new_deltas[name]) 1325 new_idl.append(new_idl_d[name]) 1326 new_means.append(new_r_values[name][i_val]) 1327 new_names_obs.append(name) 1328 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1329 for name in new_covobs: 1330 final_result[i_val].names.append(name) 1331 final_result[i_val]._covobs = new_covobs 1332 final_result[i_val]._value = new_val 1333 final_result[i_val].reweighted = reweighted 1334 1335 if multi == 0: 1336 final_result = final_result.item() 1337 1338 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1370def reweight(weight, obs, **kwargs): 1371 """Reweight a list of observables. 1372 1373 Parameters 1374 ---------- 1375 weight : Obs 1376 Reweighting factor. An Observable that has to be defined on a superset of the 1377 configurations in obs[i].idl for all i. 1378 obs : list 1379 list of Obs, e.g. [obs1, obs2, obs3]. 1380 all_configs : bool 1381 if True, the reweighted observables are normalized by the average of 1382 the reweighting factor on all configurations in weight.idl and not 1383 on the configurations in obs[i].idl. Default False. 1384 """ 1385 result = [] 1386 for i in range(len(obs)): 1387 if len(obs[i].cov_names): 1388 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1389 if not set(obs[i].names).issubset(weight.names): 1390 raise Exception('Error: Ensembles do not fit') 1391 for name in obs[i].names: 1392 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1393 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1394 new_samples = [] 1395 w_deltas = {} 1396 for name in sorted(obs[i].names): 1397 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1398 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1399 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1400 1401 if kwargs.get('all_configs'): 1402 new_weight = weight 1403 else: 1404 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1405 1406 result.append(tmp_obs / new_weight) 1407 result[-1].reweighted = True 1408 1409 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1412def correlate(obs_a, obs_b): 1413 """Correlate two observables. 1414 1415 Parameters 1416 ---------- 1417 obs_a : Obs 1418 First observable 1419 obs_b : Obs 1420 Second observable 1421 1422 Notes 1423 ----- 1424 Keep in mind to only correlate primary observables which have not been reweighted 1425 yet. The reweighting has to be applied after correlating the observables. 1426 Currently only works if ensembles are identical (this is not strictly necessary). 1427 """ 1428 1429 if sorted(obs_a.names) != sorted(obs_b.names): 1430 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1431 if len(obs_a.cov_names) or len(obs_b.cov_names): 1432 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1433 for name in obs_a.names: 1434 if obs_a.shape[name] != obs_b.shape[name]: 1435 raise Exception('Shapes of ensemble', name, 'do not fit') 1436 if obs_a.idl[name] != obs_b.idl[name]: 1437 raise Exception('idl of ensemble', name, 'do not fit') 1438 1439 if obs_a.reweighted is True: 1440 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1441 if obs_b.reweighted is True: 1442 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1443 1444 new_samples = [] 1445 new_idl = [] 1446 for name in sorted(obs_a.names): 1447 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1448 new_idl.append(obs_a.idl[name]) 1449 1450 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1451 o.reweighted = obs_a.reweighted or obs_b.reweighted 1452 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1455def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1456 r'''Calculates the error covariance matrix of a set of observables. 1457 1458 WARNING: This function should be used with care, especially for observables with support on multiple 1459 ensembles with differing autocorrelations. See the notes below for details. 1460 1461 The gamma method has to be applied first to all observables. 1462 1463 Parameters 1464 ---------- 1465 obs : list or numpy.ndarray 1466 List or one dimensional array of Obs 1467 visualize : bool 1468 If True plots the corresponding normalized correlation matrix (default False). 1469 correlation : bool 1470 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1471 smooth : None or int 1472 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1473 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1474 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1475 small ones. 1476 1477 Notes 1478 ----- 1479 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1480 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1481 in the absence of autocorrelation. 1482 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1483 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1484 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1485 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1486 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1487 ''' 1488 1489 length = len(obs) 1490 1491 max_samples = np.max([o.N for o in obs]) 1492 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1493 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1494 1495 cov = np.zeros((length, length)) 1496 for i in range(length): 1497 for j in range(i, length): 1498 cov[i, j] = _covariance_element(obs[i], obs[j]) 1499 cov = cov + cov.T - np.diag(np.diag(cov)) 1500 1501 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1502 1503 if isinstance(smooth, int): 1504 corr = _smooth_eigenvalues(corr, smooth) 1505 1506 if visualize: 1507 plt.matshow(corr, vmin=-1, vmax=1) 1508 plt.set_cmap('RdBu') 1509 plt.colorbar() 1510 plt.draw() 1511 1512 if correlation is True: 1513 return corr 1514 1515 errors = [o.dvalue for o in obs] 1516 cov = np.diag(errors) @ corr @ np.diag(errors) 1517 1518 eigenvalues = np.linalg.eigh(cov)[0] 1519 if not np.all(eigenvalues >= 0): 1520 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1521 1522 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1602def import_jackknife(jacks, name, idl=None): 1603 """Imports jackknife samples and returns an Obs 1604 1605 Parameters 1606 ---------- 1607 jacks : numpy.ndarray 1608 numpy array containing the mean value as zeroth entry and 1609 the N jackknife samples as first to Nth entry. 1610 name : str 1611 name of the ensemble the samples are defined on. 1612 """ 1613 length = len(jacks) - 1 1614 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1615 samples = jacks[1:] @ prj 1616 mean = np.mean(samples) 1617 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1618 new_obs._value = jacks[0] 1619 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1622def import_bootstrap(boots, name, random_numbers): 1623 """Imports bootstrap samples and returns an Obs 1624 1625 Parameters 1626 ---------- 1627 boots : numpy.ndarray 1628 numpy array containing the mean value as zeroth entry and 1629 the N bootstrap samples as first to Nth entry. 1630 name : str 1631 name of the ensemble the samples are defined on. 1632 random_numbers : np.ndarray 1633 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, 1634 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo 1635 chain to be reconstructed. 1636 """ 1637 samples, length = random_numbers.shape 1638 if samples != len(boots) - 1: 1639 raise ValueError("Random numbers do not have the correct shape.") 1640 1641 if samples < length: 1642 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") 1643 1644 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 1645 1646 samples = scipy.linalg.lstsq(proj, boots[1:])[0] 1647 ret = Obs([samples], [name]) 1648 ret._value = boots[0] 1649 return ret
Imports bootstrap samples and returns an Obs
Parameters
- boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
- random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.
1652def merge_obs(list_of_obs): 1653 """Combine all observables in list_of_obs into one new observable 1654 1655 Parameters 1656 ---------- 1657 list_of_obs : list 1658 list of the Obs object to be combined 1659 1660 Notes 1661 ----- 1662 It is not possible to combine obs which are based on the same replicum 1663 """ 1664 replist = [item for obs in list_of_obs for item in obs.names] 1665 if (len(replist) == len(set(replist))) is False: 1666 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1667 if any([len(o.cov_names) for o in list_of_obs]): 1668 raise Exception('Not possible to merge data that contains covobs!') 1669 new_dict = {} 1670 idl_dict = {} 1671 for o in list_of_obs: 1672 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1673 for key in set(o.deltas) | set(o.r_values)}) 1674 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1675 1676 names = sorted(new_dict.keys()) 1677 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1678 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1679 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1682def cov_Obs(means, cov, name, grad=None): 1683 """Create an Obs based on mean(s) and a covariance matrix 1684 1685 Parameters 1686 ---------- 1687 mean : list of floats or float 1688 N mean value(s) of the new Obs 1689 cov : list or array 1690 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1691 name : str 1692 identifier for the covariance matrix 1693 grad : list or array 1694 Gradient of the Covobs wrt. the means belonging to cov. 1695 """ 1696 1697 def covobs_to_obs(co): 1698 """Make an Obs out of a Covobs 1699 1700 Parameters 1701 ---------- 1702 co : Covobs 1703 Covobs to be embedded into the Obs 1704 """ 1705 o = Obs([], [], means=[]) 1706 o._value = co.value 1707 o.names.append(co.name) 1708 o._covobs[co.name] = co 1709 o._dvalue = np.sqrt(co.errsq()) 1710 return o 1711 1712 ol = [] 1713 if isinstance(means, (float, int)): 1714 means = [means] 1715 1716 for i in range(len(means)): 1717 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1718 if ol[0].covobs[name].N != len(means): 1719 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1720 if len(ol) == 1: 1721 return ol[0] 1722 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.