pyerrors.obs
1import warnings 2import hashlib 3import pickle 4import numpy as np 5import autograd.numpy as anp # Thinly-wrapped numpy 6import scipy 7from autograd import jacobian 8import matplotlib.pyplot as plt 9from scipy.stats import skew, skewtest, kurtosis, kurtosistest 10import numdifftools as nd 11from itertools import groupby 12from .covobs import Covobs 13 14# Improve print output of numpy.ndarrays containing Obs objects. 15np.set_printoptions(formatter={'object': lambda x: str(x)}) 16 17 18class Obs: 19 """Class for a general observable. 20 21 Instances of Obs are the basic objects of a pyerrors error analysis. 22 They are initialized with a list which contains arrays of samples for 23 different ensembles/replica and another list of same length which contains 24 the names of the ensembles/replica. Mathematical operations can be 25 performed on instances. The result is another instance of Obs. The error of 26 an instance can be computed with the gamma_method. Also contains additional 27 methods for output and visualization of the error calculation. 28 29 Attributes 30 ---------- 31 S_global : float 32 Standard value for S (default 2.0) 33 S_dict : dict 34 Dictionary for S values. If an entry for a given ensemble 35 exists this overwrites the standard value for that ensemble. 36 tau_exp_global : float 37 Standard value for tau_exp (default 0.0) 38 tau_exp_dict : dict 39 Dictionary for tau_exp values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 N_sigma_global : float 42 Standard value for N_sigma (default 1.0) 43 N_sigma_dict : dict 44 Dictionary for N_sigma values. If an entry for a given ensemble exists 45 this overwrites the standard value for that ensemble. 46 """ 47 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 48 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 49 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 50 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 51 'idl', 'tag', '_covobs', '__dict__'] 52 53 S_global = 2.0 54 S_dict = {} 55 tau_exp_global = 0.0 56 tau_exp_dict = {} 57 N_sigma_global = 1.0 58 N_sigma_dict = {} 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise ValueError('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise ValueError('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise ValueError('Names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise ValueError('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]]))) 108 elif np.any(dc == 0): 109 raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]]))) 110 if len(dc) == 1: 111 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 112 else: 113 self.idl[name] = list(idx) 114 else: 115 raise TypeError('incompatible type for idl[%s].' % (name)) 116 else: 117 for name, sample in sorted(zip(names, samples)): 118 self.idl[name] = range(1, len(sample) + 1) 119 120 if kwargs.get("means") is not None: 121 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 self.r_values[name] = mean 125 self.deltas[name] = sample 126 else: 127 for name, sample in sorted(zip(names, samples)): 128 self.shape[name] = len(self.idl[name]) 129 self.N += self.shape[name] 130 if len(sample) != self.shape[name]: 131 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 132 self.r_values[name] = np.mean(sample) 133 self.deltas[name] = sample - self.r_values[name] 134 self._value += self.shape[name] * self.r_values[name] 135 self._value /= self.N 136 137 self._dvalue = 0.0 138 self.ddvalue = 0.0 139 self.reweighted = False 140 141 self.tag = None 142 143 @property 144 def value(self): 145 return self._value 146 147 @property 148 def dvalue(self): 149 return self._dvalue 150 151 @property 152 def e_names(self): 153 return sorted(set([o.split('|')[0] for o in self.names])) 154 155 @property 156 def cov_names(self): 157 return sorted(set([o for o in self.covobs.keys()])) 158 159 @property 160 def mc_names(self): 161 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 162 163 @property 164 def e_content(self): 165 res = {} 166 for e, e_name in enumerate(self.e_names): 167 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 168 if e_name in self.names: 169 res[e_name].append(e_name) 170 return res 171 172 @property 173 def covobs(self): 174 return self._covobs 175 176 def gamma_method(self, **kwargs): 177 """Estimate the error and related properties of the Obs. 178 179 Parameters 180 ---------- 181 S : float 182 specifies a custom value for the parameter S (default 2.0). 183 If set to 0 it is assumed that the data exhibits no 184 autocorrelation. In this case the error estimates coincides 185 with the sample standard error. 186 tau_exp : float 187 positive value triggers the critical slowing down analysis 188 (default 0.0). 189 N_sigma : float 190 number of standard deviations from zero until the tail is 191 attached to the autocorrelation function (default 1). 192 fft : bool 193 determines whether the fft algorithm is used for the computation 194 of the autocorrelation function (default True) 195 """ 196 197 e_content = self.e_content 198 self.e_dvalue = {} 199 self.e_ddvalue = {} 200 self.e_tauint = {} 201 self.e_dtauint = {} 202 self.e_windowsize = {} 203 self.e_n_tauint = {} 204 self.e_n_dtauint = {} 205 e_gamma = {} 206 self.e_rho = {} 207 self.e_drho = {} 208 self._dvalue = 0 209 self.ddvalue = 0 210 211 self.S = {} 212 self.tau_exp = {} 213 self.N_sigma = {} 214 215 if kwargs.get('fft') is False: 216 fft = False 217 else: 218 fft = True 219 220 def _parse_kwarg(kwarg_name): 221 if kwarg_name in kwargs: 222 tmp = kwargs.get(kwarg_name) 223 if isinstance(tmp, (int, float)): 224 if tmp < 0: 225 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 226 for e, e_name in enumerate(self.e_names): 227 getattr(self, kwarg_name)[e_name] = tmp 228 else: 229 raise TypeError(kwarg_name + ' is not in proper format.') 230 else: 231 for e, e_name in enumerate(self.e_names): 232 if e_name in getattr(Obs, kwarg_name + '_dict'): 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 234 else: 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 236 237 _parse_kwarg('S') 238 _parse_kwarg('tau_exp') 239 _parse_kwarg('N_sigma') 240 241 for e, e_name in enumerate(self.mc_names): 242 gapsize = _determine_gap(self, e_content, e_name) 243 244 r_length = [] 245 for r_name in e_content[e_name]: 246 if isinstance(self.idl[r_name], range): 247 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 248 else: 249 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 250 251 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 252 w_max = max(r_length) // 2 253 e_gamma[e_name] = np.zeros(w_max) 254 self.e_rho[e_name] = np.zeros(w_max) 255 self.e_drho[e_name] = np.zeros(w_max) 256 257 for r_name in e_content[e_name]: 258 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 259 260 gamma_div = np.zeros(w_max) 261 for r_name in e_content[e_name]: 262 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 263 gamma_div[gamma_div < 1] = 1.0 264 e_gamma[e_name] /= gamma_div[:w_max] 265 266 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 267 self.e_tauint[e_name] = 0.5 268 self.e_dtauint[e_name] = 0.0 269 self.e_dvalue[e_name] = 0.0 270 self.e_ddvalue[e_name] = 0.0 271 self.e_windowsize[e_name] = 0 272 continue 273 274 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 275 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 276 # Make sure no entry of tauint is smaller than 0.5 277 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 278 # hep-lat/0306017 eq. (42) 279 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 280 self.e_n_dtauint[e_name][0] = 0.0 281 282 def _compute_drho(i): 283 tmp = (self.e_rho[e_name][i + 1:w_max] 284 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 285 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 286 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 287 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 288 289 if self.tau_exp[e_name] > 0: 290 _compute_drho(1) 291 texp = self.tau_exp[e_name] 292 # Critical slowing down analysis 293 if w_max // 2 <= 1: 294 raise Exception("Need at least 8 samples for tau_exp error analysis") 295 for n in range(1, w_max // 2): 296 _compute_drho(n + 1) 297 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 298 # Bias correction hep-lat/0306017 eq. (49) included 299 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 300 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 301 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 302 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 303 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 304 self.e_windowsize[e_name] = n 305 break 306 else: 307 if self.S[e_name] == 0.0: 308 self.e_tauint[e_name] = 0.5 309 self.e_dtauint[e_name] = 0.0 310 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 311 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 312 self.e_windowsize[e_name] = 0 313 else: 314 # Standard automatic windowing procedure 315 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 316 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 317 for n in range(1, w_max): 318 if g_w[n - 1] < 0 or n >= w_max - 1: 319 _compute_drho(n) 320 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 321 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 322 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 323 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 324 self.e_windowsize[e_name] = n 325 break 326 327 self._dvalue += self.e_dvalue[e_name] ** 2 328 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 329 330 for e_name in self.cov_names: 331 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 332 self.e_ddvalue[e_name] = 0 333 self._dvalue += self.e_dvalue[e_name]**2 334 335 self._dvalue = np.sqrt(self._dvalue) 336 if self._dvalue == 0.0: 337 self.ddvalue = 0.0 338 else: 339 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 340 return 341 342 gm = gamma_method 343 344 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 345 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 346 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 347 348 Parameters 349 ---------- 350 deltas : list 351 List of fluctuations 352 idx : list 353 List or range of configurations on which the deltas are defined. 354 shape : int 355 Number of configurations in idx. 356 w_max : int 357 Upper bound for the summation window. 358 fft : bool 359 determines whether the fft algorithm is used for the computation 360 of the autocorrelation function. 361 gapsize : int 362 The target distance between two configurations. If longer distances 363 are found in idx, the data is expanded. 364 """ 365 gamma = np.zeros(w_max) 366 deltas = _expand_deltas(deltas, idx, shape, gapsize) 367 new_shape = len(deltas) 368 if fft: 369 max_gamma = min(new_shape, w_max) 370 # The padding for the fft has to be even 371 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 372 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 373 else: 374 for n in range(w_max): 375 if new_shape - n >= 0: 376 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 377 378 return gamma 379 380 def details(self, ens_content=True): 381 """Output detailed properties of the Obs. 382 383 Parameters 384 ---------- 385 ens_content : bool 386 print details about the ensembles and replica if true. 387 """ 388 if self.tag is not None: 389 print("Description:", self.tag) 390 if not hasattr(self, 'e_dvalue'): 391 print('Result\t %3.8e' % (self.value)) 392 else: 393 if self.value == 0.0: 394 percentage = np.nan 395 else: 396 percentage = np.abs(self._dvalue / self.value) * 100 397 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 398 if len(self.e_names) > 1: 399 print(' Ensemble errors:') 400 e_content = self.e_content 401 for e_name in self.mc_names: 402 gap = _determine_gap(self, e_content, e_name) 403 404 if len(self.e_names) > 1: 405 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 406 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 407 tau_string += f" in units of {gap} config" 408 if gap > 1: 409 tau_string += "s" 410 if self.tau_exp[e_name] > 0: 411 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 412 else: 413 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 414 print(tau_string) 415 for e_name in self.cov_names: 416 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 417 if ens_content is True: 418 if len(self.e_names) == 1: 419 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 420 else: 421 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 422 my_string_list = [] 423 for key, value in sorted(self.e_content.items()): 424 if key not in self.covobs: 425 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 426 if len(value) == 1: 427 my_string += f': {self.shape[value[0]]} configurations' 428 if isinstance(self.idl[value[0]], range): 429 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 430 else: 431 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 432 else: 433 sublist = [] 434 for v in value: 435 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 436 my_substring += f': {self.shape[v]} configurations' 437 if isinstance(self.idl[v], range): 438 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 439 else: 440 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 441 sublist.append(my_substring) 442 443 my_string += '\n' + '\n'.join(sublist) 444 else: 445 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 446 my_string_list.append(my_string) 447 print('\n'.join(my_string_list)) 448 449 def reweight(self, weight): 450 """Reweight the obs with given rewighting factors. 451 452 Parameters 453 ---------- 454 weight : Obs 455 Reweighting factor. An Observable that has to be defined on a superset of the 456 configurations in obs[i].idl for all i. 457 all_configs : bool 458 if True, the reweighted observables are normalized by the average of 459 the reweighting factor on all configurations in weight.idl and not 460 on the configurations in obs[i].idl. Default False. 461 """ 462 return reweight(weight, [self])[0] 463 464 def is_zero_within_error(self, sigma=1): 465 """Checks whether the observable is zero within 'sigma' standard errors. 466 467 Parameters 468 ---------- 469 sigma : int 470 Number of standard errors used for the check. 471 472 Works only properly when the gamma method was run. 473 """ 474 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 475 476 def is_zero(self, atol=1e-10): 477 """Checks whether the observable is zero within a given tolerance. 478 479 Parameters 480 ---------- 481 atol : float 482 Absolute tolerance (for details see numpy documentation). 483 """ 484 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 485 486 def plot_tauint(self, save=None): 487 """Plot integrated autocorrelation time for each ensemble. 488 489 Parameters 490 ---------- 491 save : str 492 saves the figure to a file named 'save' if. 493 """ 494 if not hasattr(self, 'e_dvalue'): 495 raise Exception('Run the gamma method first.') 496 497 for e, e_name in enumerate(self.mc_names): 498 fig = plt.figure() 499 plt.xlabel(r'$W$') 500 plt.ylabel(r'$\tau_\mathrm{int}$') 501 length = int(len(self.e_n_tauint[e_name])) 502 if self.tau_exp[e_name] > 0: 503 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 504 x_help = np.arange(2 * self.tau_exp[e_name]) 505 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 506 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 507 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 508 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 509 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 510 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 511 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 512 else: 513 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 514 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 515 516 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 517 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 518 plt.legend() 519 plt.xlim(-0.5, xmax) 520 ylim = plt.ylim() 521 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 522 plt.draw() 523 if save: 524 fig.savefig(save + "_" + str(e)) 525 526 def plot_rho(self, save=None): 527 """Plot normalized autocorrelation function time for each ensemble. 528 529 Parameters 530 ---------- 531 save : str 532 saves the figure to a file named 'save' if. 533 """ 534 if not hasattr(self, 'e_dvalue'): 535 raise Exception('Run the gamma method first.') 536 for e, e_name in enumerate(self.mc_names): 537 fig = plt.figure() 538 plt.xlabel('W') 539 plt.ylabel('rho') 540 length = int(len(self.e_drho[e_name])) 541 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 542 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 543 if self.tau_exp[e_name] > 0: 544 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 545 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 546 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 547 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 548 else: 549 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 550 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 551 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 552 plt.xlim(-0.5, xmax) 553 plt.draw() 554 if save: 555 fig.savefig(save + "_" + str(e)) 556 557 def plot_rep_dist(self): 558 """Plot replica distribution for each ensemble with more than one replicum.""" 559 if not hasattr(self, 'e_dvalue'): 560 raise Exception('Run the gamma method first.') 561 for e, e_name in enumerate(self.mc_names): 562 if len(self.e_content[e_name]) == 1: 563 print('No replica distribution for a single replicum (', e_name, ')') 564 continue 565 r_length = [] 566 sub_r_mean = 0 567 for r, r_name in enumerate(self.e_content[e_name]): 568 r_length.append(len(self.deltas[r_name])) 569 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 570 e_N = np.sum(r_length) 571 sub_r_mean /= e_N 572 arr = np.zeros(len(self.e_content[e_name])) 573 for r, r_name in enumerate(self.e_content[e_name]): 574 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 575 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 576 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 577 plt.draw() 578 579 def plot_history(self, expand=True): 580 """Plot derived Monte Carlo history for each ensemble 581 582 Parameters 583 ---------- 584 expand : bool 585 show expanded history for irregular Monte Carlo chains (default: True). 586 """ 587 for e, e_name in enumerate(self.mc_names): 588 plt.figure() 589 r_length = [] 590 tmp = [] 591 tmp_expanded = [] 592 for r, r_name in enumerate(self.e_content[e_name]): 593 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 594 if expand: 595 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 596 r_length.append(len(tmp_expanded[-1])) 597 else: 598 r_length.append(len(tmp[-1])) 599 e_N = np.sum(r_length) 600 x = np.arange(e_N) 601 y_test = np.concatenate(tmp, axis=0) 602 if expand: 603 y = np.concatenate(tmp_expanded, axis=0) 604 else: 605 y = y_test 606 plt.errorbar(x, y, fmt='.', markersize=3) 607 plt.xlim(-0.5, e_N - 0.5) 608 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 609 plt.draw() 610 611 def plot_piechart(self, save=None): 612 """Plot piechart which shows the fractional contribution of each 613 ensemble to the error and returns a dictionary containing the fractions. 614 615 Parameters 616 ---------- 617 save : str 618 saves the figure to a file named 'save' if. 619 """ 620 if not hasattr(self, 'e_dvalue'): 621 raise Exception('Run the gamma method first.') 622 if np.isclose(0.0, self._dvalue, atol=1e-15): 623 raise Exception('Error is 0.0') 624 labels = self.e_names 625 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 626 fig1, ax1 = plt.subplots() 627 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 628 ax1.axis('equal') 629 plt.draw() 630 if save: 631 fig1.savefig(save) 632 633 return dict(zip(labels, sizes)) 634 635 def dump(self, filename, datatype="json.gz", description="", **kwargs): 636 """Dump the Obs to a file 'name' of chosen format. 637 638 Parameters 639 ---------- 640 filename : str 641 name of the file to be saved. 642 datatype : str 643 Format of the exported file. Supported formats include 644 "json.gz" and "pickle" 645 description : str 646 Description for output file, only relevant for json.gz format. 647 path : str 648 specifies a custom path for the file (default '.') 649 """ 650 if 'path' in kwargs: 651 file_name = kwargs.get('path') + '/' + filename 652 else: 653 file_name = filename 654 655 if datatype == "json.gz": 656 from .input.json import dump_to_json 657 dump_to_json([self], file_name, description=description) 658 elif datatype == "pickle": 659 with open(file_name + '.p', 'wb') as fb: 660 pickle.dump(self, fb) 661 else: 662 raise Exception("Unknown datatype " + str(datatype)) 663 664 def export_jackknife(self): 665 """Export jackknife samples from the Obs 666 667 Returns 668 ------- 669 numpy.ndarray 670 Returns a numpy array of length N + 1 where N is the number of samples 671 for the given ensemble and replicum. The zeroth entry of the array contains 672 the mean value of the Obs, entries 1 to N contain the N jackknife samples 673 derived from the Obs. The current implementation only works for observables 674 defined on exactly one ensemble and replicum. The derived jackknife samples 675 should agree with samples from a full jackknife analysis up to O(1/N). 676 """ 677 678 if len(self.names) != 1: 679 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 680 681 name = self.names[0] 682 full_data = self.deltas[name] + self.r_values[name] 683 n = full_data.size 684 mean = self.value 685 tmp_jacks = np.zeros(n + 1) 686 tmp_jacks[0] = mean 687 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 688 return tmp_jacks 689 690 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 691 """Export bootstrap samples from the Obs 692 693 Parameters 694 ---------- 695 samples : int 696 Number of bootstrap samples to generate. 697 random_numbers : np.ndarray 698 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 699 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 700 save_rng : str 701 Save the random numbers to a file if a path is specified. 702 703 Returns 704 ------- 705 numpy.ndarray 706 Returns a numpy array of length N + 1 where N is the number of samples 707 for the given ensemble and replicum. The zeroth entry of the array contains 708 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 709 derived from the Obs. The current implementation only works for observables 710 defined on exactly one ensemble and replicum. The derived bootstrap samples 711 should agree with samples from a full bootstrap analysis up to O(1/N). 712 """ 713 if len(self.names) != 1: 714 raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 715 716 name = self.names[0] 717 length = self.N 718 719 if random_numbers is None: 720 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 721 rng = np.random.default_rng(seed) 722 random_numbers = rng.integers(0, length, size=(samples, length)) 723 724 if save_rng is not None: 725 np.savetxt(save_rng, random_numbers, fmt='%i') 726 727 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 728 ret = np.zeros(samples + 1) 729 ret[0] = self.value 730 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 731 return ret 732 733 def __float__(self): 734 return float(self.value) 735 736 def __repr__(self): 737 return 'Obs[' + str(self) + ']' 738 739 def __str__(self): 740 return _format_uncertainty(self.value, self._dvalue) 741 742 def __format__(self, format_type): 743 if format_type == "": 744 significance = 2 745 else: 746 significance = int(float(format_type.replace("+", "").replace("-", ""))) 747 my_str = _format_uncertainty(self.value, self._dvalue, 748 significance=significance) 749 for char in ["+", " "]: 750 if format_type.startswith(char): 751 if my_str[0] != "-": 752 my_str = char + my_str 753 return my_str 754 755 def __hash__(self): 756 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 757 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 758 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 759 hash_tuple += tuple([o.encode() for o in self.names]) 760 m = hashlib.md5() 761 [m.update(o) for o in hash_tuple] 762 return int(m.hexdigest(), 16) & 0xFFFFFFFF 763 764 # Overload comparisons 765 def __lt__(self, other): 766 return self.value < other 767 768 def __le__(self, other): 769 return self.value <= other 770 771 def __gt__(self, other): 772 return self.value > other 773 774 def __ge__(self, other): 775 return self.value >= other 776 777 def __eq__(self, other): 778 if other is None: 779 return False 780 return (self - other).is_zero() 781 782 # Overload math operations 783 def __add__(self, y): 784 if isinstance(y, Obs): 785 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 786 else: 787 if isinstance(y, np.ndarray): 788 return np.array([self + o for o in y]) 789 elif isinstance(y, complex): 790 return CObs(self, 0) + y 791 elif y.__class__.__name__ in ['Corr', 'CObs']: 792 return NotImplemented 793 else: 794 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 795 796 def __radd__(self, y): 797 return self + y 798 799 def __mul__(self, y): 800 if isinstance(y, Obs): 801 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 802 else: 803 if isinstance(y, np.ndarray): 804 return np.array([self * o for o in y]) 805 elif isinstance(y, complex): 806 return CObs(self * y.real, self * y.imag) 807 elif y.__class__.__name__ in ['Corr', 'CObs']: 808 return NotImplemented 809 else: 810 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 811 812 def __rmul__(self, y): 813 return self * y 814 815 def __sub__(self, y): 816 if isinstance(y, Obs): 817 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 818 else: 819 if isinstance(y, np.ndarray): 820 return np.array([self - o for o in y]) 821 elif y.__class__.__name__ in ['Corr', 'CObs']: 822 return NotImplemented 823 else: 824 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 825 826 def __rsub__(self, y): 827 return -1 * (self - y) 828 829 def __pos__(self): 830 return self 831 832 def __neg__(self): 833 return -1 * self 834 835 def __truediv__(self, y): 836 if isinstance(y, Obs): 837 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 838 else: 839 if isinstance(y, np.ndarray): 840 return np.array([self / o for o in y]) 841 elif y.__class__.__name__ in ['Corr', 'CObs']: 842 return NotImplemented 843 else: 844 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 845 846 def __rtruediv__(self, y): 847 if isinstance(y, Obs): 848 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 849 else: 850 if isinstance(y, np.ndarray): 851 return np.array([o / self for o in y]) 852 elif y.__class__.__name__ in ['Corr', 'CObs']: 853 return NotImplemented 854 else: 855 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 856 857 def __pow__(self, y): 858 if isinstance(y, Obs): 859 return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)]) 860 else: 861 return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)]) 862 863 def __rpow__(self, y): 864 return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)]) 865 866 def __abs__(self): 867 return derived_observable(lambda x: anp.abs(x[0]), [self]) 868 869 # Overload numpy functions 870 def sqrt(self): 871 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 872 873 def log(self): 874 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 875 876 def exp(self): 877 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 878 879 def sin(self): 880 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 881 882 def cos(self): 883 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 884 885 def tan(self): 886 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 887 888 def arcsin(self): 889 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 890 891 def arccos(self): 892 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 893 894 def arctan(self): 895 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 896 897 def sinh(self): 898 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 899 900 def cosh(self): 901 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 902 903 def tanh(self): 904 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 905 906 def arcsinh(self): 907 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 908 909 def arccosh(self): 910 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 911 912 def arctanh(self): 913 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 914 915 916class CObs: 917 """Class for a complex valued observable.""" 918 __slots__ = ['_real', '_imag', 'tag'] 919 920 def __init__(self, real, imag=0.0): 921 self._real = real 922 self._imag = imag 923 self.tag = None 924 925 @property 926 def real(self): 927 return self._real 928 929 @property 930 def imag(self): 931 return self._imag 932 933 def gamma_method(self, **kwargs): 934 """Executes the gamma_method for the real and the imaginary part.""" 935 if isinstance(self.real, Obs): 936 self.real.gamma_method(**kwargs) 937 if isinstance(self.imag, Obs): 938 self.imag.gamma_method(**kwargs) 939 940 def is_zero(self): 941 """Checks whether both real and imaginary part are zero within machine precision.""" 942 return self.real == 0.0 and self.imag == 0.0 943 944 def conjugate(self): 945 return CObs(self.real, -self.imag) 946 947 def __add__(self, other): 948 if isinstance(other, np.ndarray): 949 return other + self 950 elif hasattr(other, 'real') and hasattr(other, 'imag'): 951 return CObs(self.real + other.real, 952 self.imag + other.imag) 953 else: 954 return CObs(self.real + other, self.imag) 955 956 def __radd__(self, y): 957 return self + y 958 959 def __sub__(self, other): 960 if isinstance(other, np.ndarray): 961 return -1 * (other - self) 962 elif hasattr(other, 'real') and hasattr(other, 'imag'): 963 return CObs(self.real - other.real, self.imag - other.imag) 964 else: 965 return CObs(self.real - other, self.imag) 966 967 def __rsub__(self, other): 968 return -1 * (self - other) 969 970 def __mul__(self, other): 971 if isinstance(other, np.ndarray): 972 return other * self 973 elif hasattr(other, 'real') and hasattr(other, 'imag'): 974 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 975 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 976 [self.real, other.real, self.imag, other.imag], 977 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 978 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 979 [self.real, other.real, self.imag, other.imag], 980 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 981 elif getattr(other, 'imag', 0) != 0: 982 return CObs(self.real * other.real - self.imag * other.imag, 983 self.imag * other.real + self.real * other.imag) 984 else: 985 return CObs(self.real * other.real, self.imag * other.real) 986 else: 987 return CObs(self.real * other, self.imag * other) 988 989 def __rmul__(self, other): 990 return self * other 991 992 def __truediv__(self, other): 993 if isinstance(other, np.ndarray): 994 return 1 / (other / self) 995 elif hasattr(other, 'real') and hasattr(other, 'imag'): 996 r = other.real ** 2 + other.imag ** 2 997 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 998 else: 999 return CObs(self.real / other, self.imag / other) 1000 1001 def __rtruediv__(self, other): 1002 r = self.real ** 2 + self.imag ** 2 1003 if hasattr(other, 'real') and hasattr(other, 'imag'): 1004 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 1005 else: 1006 return CObs(self.real * other / r, -self.imag * other / r) 1007 1008 def __abs__(self): 1009 return np.sqrt(self.real**2 + self.imag**2) 1010 1011 def __pos__(self): 1012 return self 1013 1014 def __neg__(self): 1015 return -1 * self 1016 1017 def __eq__(self, other): 1018 return self.real == other.real and self.imag == other.imag 1019 1020 def __str__(self): 1021 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 1022 1023 def __repr__(self): 1024 return 'CObs[' + str(self) + ']' 1025 1026 def __format__(self, format_type): 1027 if format_type == "": 1028 significance = 2 1029 format_type = "2" 1030 else: 1031 significance = int(float(format_type.replace("+", "").replace("-", ""))) 1032 return f"({self.real:{format_type}}{self.imag:+{significance}}j)" 1033 1034 1035def gamma_method(x, **kwargs): 1036 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1037 1038 See docstring of pe.Obs.gamma_method for details. 1039 """ 1040 return np.vectorize(lambda o: o.gm(**kwargs))(x) 1041 1042 1043gm = gamma_method 1044 1045 1046def _format_uncertainty(value, dvalue, significance=2): 1047 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 1048 if dvalue == 0.0 or (not np.isfinite(dvalue)): 1049 return str(value) 1050 if not isinstance(significance, int): 1051 raise TypeError("significance needs to be an integer.") 1052 if significance < 1: 1053 raise ValueError("significance needs to be larger than zero.") 1054 fexp = np.floor(np.log10(dvalue)) 1055 if fexp < 0.0: 1056 return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f') 1057 elif fexp == 0.0: 1058 return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})" 1059 else: 1060 return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})" 1061 1062 1063def _expand_deltas(deltas, idx, shape, gapsize): 1064 """Expand deltas defined on idx to a regular range with spacing gapsize between two 1065 configurations and where holes are filled by 0. 1066 If idx is of type range, the deltas are not changed if the idx.step == gapsize. 1067 1068 Parameters 1069 ---------- 1070 deltas : list 1071 List of fluctuations 1072 idx : list 1073 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1074 shape : int 1075 Number of configs in idx. 1076 gapsize : int 1077 The target distance between two configurations. If longer distances 1078 are found in idx, the data is expanded. 1079 """ 1080 if isinstance(idx, range): 1081 if (idx.step == gapsize): 1082 return deltas 1083 ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize) 1084 for i in range(shape): 1085 ret[(idx[i] - idx[0]) // gapsize] = deltas[i] 1086 return ret 1087 1088 1089def _merge_idx(idl): 1090 """Returns the union of all lists in idl as range or sorted list 1091 1092 Parameters 1093 ---------- 1094 idl : list 1095 List of lists or ranges. 1096 """ 1097 1098 if _check_lists_equal(idl): 1099 return idl[0] 1100 1101 idunion = sorted(set().union(*idl)) 1102 1103 # Check whether idunion can be expressed as range 1104 idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0]) 1105 idtest = [list(idrange), idunion] 1106 if _check_lists_equal(idtest): 1107 return idrange 1108 1109 return idunion 1110 1111 1112def _intersection_idx(idl): 1113 """Returns the intersection of all lists in idl as range or sorted list 1114 1115 Parameters 1116 ---------- 1117 idl : list 1118 List of lists or ranges. 1119 """ 1120 1121 if _check_lists_equal(idl): 1122 return idl[0] 1123 1124 idinter = sorted(set.intersection(*[set(o) for o in idl])) 1125 1126 # Check whether idinter can be expressed as range 1127 try: 1128 idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0]) 1129 idtest = [list(idrange), idinter] 1130 if _check_lists_equal(idtest): 1131 return idrange 1132 except IndexError: 1133 pass 1134 1135 return idinter 1136 1137 1138def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor): 1139 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1140 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1141 common divisor of the step sizes is used as new step size. 1142 1143 Parameters 1144 ---------- 1145 deltas : list 1146 List of fluctuations 1147 idx : list 1148 List or range of configs on which the deltas are defined. 1149 Has to be a subset of new_idx and has to be sorted in ascending order. 1150 shape : list 1151 Number of configs in idx. 1152 new_idx : list 1153 List of configs that defines the new range, has to be sorted in ascending order. 1154 scalefactor : float 1155 An additional scaling factor that can be applied to scale the fluctuations, 1156 e.g., when Obs with differing numbers of replica are merged. 1157 """ 1158 if type(idx) is range and type(new_idx) is range: 1159 if idx == new_idx: 1160 if scalefactor == 1: 1161 return deltas 1162 else: 1163 return deltas * scalefactor 1164 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1165 for i in range(shape): 1166 ret[idx[i] - new_idx[0]] = deltas[i] 1167 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor 1168 1169 1170def derived_observable(func, data, array_mode=False, **kwargs): 1171 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1172 1173 Parameters 1174 ---------- 1175 func : object 1176 arbitrary function of the form func(data, **kwargs). For the 1177 automatic differentiation to work, all numpy functions have to have 1178 the autograd wrapper (use 'import autograd.numpy as anp'). 1179 data : list 1180 list of Obs, e.g. [obs1, obs2, obs3]. 1181 num_grad : bool 1182 if True, numerical derivatives are used instead of autograd 1183 (default False). To control the numerical differentiation the 1184 kwargs of numdifftools.step_generators.MaxStepGenerator 1185 can be used. 1186 man_grad : list 1187 manually supply a list or an array which contains the jacobian 1188 of func. Use cautiously, supplying the wrong derivative will 1189 not be intercepted. 1190 1191 Notes 1192 ----- 1193 For simple mathematical operations it can be practical to use anonymous 1194 functions. For the ratio of two observables one can e.g. use 1195 1196 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1197 """ 1198 1199 data = np.asarray(data) 1200 raveled_data = data.ravel() 1201 1202 # Workaround for matrix operations containing non Obs data 1203 if not all(isinstance(x, Obs) for x in raveled_data): 1204 for i in range(len(raveled_data)): 1205 if isinstance(raveled_data[i], (int, float)): 1206 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1207 1208 allcov = {} 1209 for o in raveled_data: 1210 for name in o.cov_names: 1211 if name in allcov: 1212 if not np.allclose(allcov[name], o.covobs[name].cov): 1213 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1214 else: 1215 allcov[name] = o.covobs[name].cov 1216 1217 n_obs = len(raveled_data) 1218 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1219 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1220 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1221 1222 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1223 1224 if data.ndim == 1: 1225 values = np.array([o.value for o in data]) 1226 else: 1227 values = np.vectorize(lambda x: x.value)(data) 1228 1229 new_values = func(values, **kwargs) 1230 1231 multi = int(isinstance(new_values, np.ndarray)) 1232 1233 new_r_values = {} 1234 new_idl_d = {} 1235 for name in new_sample_names: 1236 idl = [] 1237 tmp_values = np.zeros(n_obs) 1238 for i, item in enumerate(raveled_data): 1239 tmp_values[i] = item.r_values.get(name, item.value) 1240 tmp_idl = item.idl.get(name) 1241 if tmp_idl is not None: 1242 idl.append(tmp_idl) 1243 if multi > 0: 1244 tmp_values = np.array(tmp_values).reshape(data.shape) 1245 new_r_values[name] = func(tmp_values, **kwargs) 1246 new_idl_d[name] = _merge_idx(idl) 1247 1248 def _compute_scalefactor_missing_rep(obs): 1249 """ 1250 Computes the scale factor that is to be multiplied with the deltas 1251 in the case where Obs with different subsets of replica are merged. 1252 Returns a dictionary with the scale factor for each Monte Carlo name. 1253 1254 Parameters 1255 ---------- 1256 obs : Obs 1257 The observable corresponding to the deltas that are to be scaled 1258 """ 1259 scalef_d = {} 1260 for mc_name in obs.mc_names: 1261 mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')] 1262 new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')] 1263 if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d): 1264 scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d]) 1265 return scalef_d 1266 1267 if 'man_grad' in kwargs: 1268 deriv = np.asarray(kwargs.get('man_grad')) 1269 if new_values.shape + data.shape != deriv.shape: 1270 raise Exception('Manual derivative does not have correct shape.') 1271 elif kwargs.get('num_grad') is True: 1272 if multi > 0: 1273 raise Exception('Multi mode currently not supported for numerical derivative') 1274 options = { 1275 'base_step': 0.1, 1276 'step_ratio': 2.5} 1277 for key in options.keys(): 1278 kwarg = kwargs.get(key) 1279 if kwarg is not None: 1280 options[key] = kwarg 1281 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1282 if tmp_df.size == 1: 1283 deriv = np.array([tmp_df.real]) 1284 else: 1285 deriv = tmp_df.real 1286 else: 1287 deriv = jacobian(func)(values, **kwargs) 1288 1289 final_result = np.zeros(new_values.shape, dtype=object) 1290 1291 if array_mode is True: 1292 1293 class _Zero_grad(): 1294 def __init__(self, N): 1295 self.grad = np.zeros((N, 1)) 1296 1297 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1298 d_extracted = {} 1299 g_extracted = {} 1300 for name in new_sample_names: 1301 d_extracted[name] = [] 1302 ens_length = len(new_idl_d[name]) 1303 for i_dat, dat in enumerate(data): 1304 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1305 for name in new_cov_names: 1306 g_extracted[name] = [] 1307 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1308 for i_dat, dat in enumerate(data): 1309 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1310 1311 for i_val, new_val in np.ndenumerate(new_values): 1312 new_deltas = {} 1313 new_grad = {} 1314 if array_mode is True: 1315 for name in new_sample_names: 1316 ens_length = d_extracted[name][0].shape[-1] 1317 new_deltas[name] = np.zeros(ens_length) 1318 for i_dat, dat in enumerate(d_extracted[name]): 1319 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1320 for name in new_cov_names: 1321 new_grad[name] = 0 1322 for i_dat, dat in enumerate(g_extracted[name]): 1323 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1324 else: 1325 for j_obs, obs in np.ndenumerate(data): 1326 scalef_d = _compute_scalefactor_missing_rep(obs) 1327 for name in obs.names: 1328 if name in obs.cov_names: 1329 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1330 else: 1331 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1)) 1332 1333 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1334 1335 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1336 raise Exception('The same name has been used for deltas and covobs!') 1337 new_samples = [] 1338 new_means = [] 1339 new_idl = [] 1340 new_names_obs = [] 1341 for name in new_names: 1342 if name not in new_covobs: 1343 new_samples.append(new_deltas[name]) 1344 new_idl.append(new_idl_d[name]) 1345 new_means.append(new_r_values[name][i_val]) 1346 new_names_obs.append(name) 1347 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1348 for name in new_covobs: 1349 final_result[i_val].names.append(name) 1350 final_result[i_val]._covobs = new_covobs 1351 final_result[i_val]._value = new_val 1352 final_result[i_val].reweighted = reweighted 1353 1354 if multi == 0: 1355 final_result = final_result.item() 1356 1357 return final_result 1358 1359 1360def _reduce_deltas(deltas, idx_old, idx_new): 1361 """Extract deltas defined on idx_old on all configs of idx_new. 1362 1363 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1364 are ordered in an ascending order. 1365 1366 Parameters 1367 ---------- 1368 deltas : list 1369 List of fluctuations 1370 idx_old : list 1371 List or range of configs on which the deltas are defined 1372 idx_new : list 1373 List of configs for which we want to extract the deltas. 1374 Has to be a subset of idx_old. 1375 """ 1376 if not len(deltas) == len(idx_old): 1377 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1378 if type(idx_old) is range and type(idx_new) is range: 1379 if idx_old == idx_new: 1380 return deltas 1381 if _check_lists_equal([idx_old, idx_new]): 1382 return deltas 1383 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1384 if len(indices) < len(idx_new): 1385 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1386 return np.array(deltas)[indices] 1387 1388 1389def reweight(weight, obs, **kwargs): 1390 """Reweight a list of observables. 1391 1392 Parameters 1393 ---------- 1394 weight : Obs 1395 Reweighting factor. An Observable that has to be defined on a superset of the 1396 configurations in obs[i].idl for all i. 1397 obs : list 1398 list of Obs, e.g. [obs1, obs2, obs3]. 1399 all_configs : bool 1400 if True, the reweighted observables are normalized by the average of 1401 the reweighting factor on all configurations in weight.idl and not 1402 on the configurations in obs[i].idl. Default False. 1403 """ 1404 result = [] 1405 for i in range(len(obs)): 1406 if len(obs[i].cov_names): 1407 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1408 if not set(obs[i].names).issubset(weight.names): 1409 raise Exception('Error: Ensembles do not fit') 1410 for name in obs[i].names: 1411 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1412 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1413 new_samples = [] 1414 w_deltas = {} 1415 for name in sorted(obs[i].names): 1416 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1417 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1418 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1419 1420 if kwargs.get('all_configs'): 1421 new_weight = weight 1422 else: 1423 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1424 1425 result.append(tmp_obs / new_weight) 1426 result[-1].reweighted = True 1427 1428 return result 1429 1430 1431def correlate(obs_a, obs_b): 1432 """Correlate two observables. 1433 1434 Parameters 1435 ---------- 1436 obs_a : Obs 1437 First observable 1438 obs_b : Obs 1439 Second observable 1440 1441 Notes 1442 ----- 1443 Keep in mind to only correlate primary observables which have not been reweighted 1444 yet. The reweighting has to be applied after correlating the observables. 1445 Currently only works if ensembles are identical (this is not strictly necessary). 1446 """ 1447 1448 if sorted(obs_a.names) != sorted(obs_b.names): 1449 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1450 if len(obs_a.cov_names) or len(obs_b.cov_names): 1451 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1452 for name in obs_a.names: 1453 if obs_a.shape[name] != obs_b.shape[name]: 1454 raise Exception('Shapes of ensemble', name, 'do not fit') 1455 if obs_a.idl[name] != obs_b.idl[name]: 1456 raise Exception('idl of ensemble', name, 'do not fit') 1457 1458 if obs_a.reweighted is True: 1459 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1460 if obs_b.reweighted is True: 1461 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1462 1463 new_samples = [] 1464 new_idl = [] 1465 for name in sorted(obs_a.names): 1466 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1467 new_idl.append(obs_a.idl[name]) 1468 1469 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1470 o.reweighted = obs_a.reweighted or obs_b.reweighted 1471 return o 1472 1473 1474def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1475 r'''Calculates the error covariance matrix of a set of observables. 1476 1477 WARNING: This function should be used with care, especially for observables with support on multiple 1478 ensembles with differing autocorrelations. See the notes below for details. 1479 1480 The gamma method has to be applied first to all observables. 1481 1482 Parameters 1483 ---------- 1484 obs : list or numpy.ndarray 1485 List or one dimensional array of Obs 1486 visualize : bool 1487 If True plots the corresponding normalized correlation matrix (default False). 1488 correlation : bool 1489 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1490 smooth : None or int 1491 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1492 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1493 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1494 small ones. 1495 1496 Notes 1497 ----- 1498 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1499 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1500 in the absence of autocorrelation. 1501 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1502 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1503 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1504 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1505 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1506 ''' 1507 1508 length = len(obs) 1509 1510 max_samples = np.max([o.N for o in obs]) 1511 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1512 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1513 1514 cov = np.zeros((length, length)) 1515 for i in range(length): 1516 for j in range(i, length): 1517 cov[i, j] = _covariance_element(obs[i], obs[j]) 1518 cov = cov + cov.T - np.diag(np.diag(cov)) 1519 1520 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1521 1522 if isinstance(smooth, int): 1523 corr = _smooth_eigenvalues(corr, smooth) 1524 1525 if visualize: 1526 plt.matshow(corr, vmin=-1, vmax=1) 1527 plt.set_cmap('RdBu') 1528 plt.colorbar() 1529 plt.draw() 1530 1531 if correlation is True: 1532 return corr 1533 1534 errors = [o.dvalue for o in obs] 1535 cov = np.diag(errors) @ corr @ np.diag(errors) 1536 1537 eigenvalues = np.linalg.eigh(cov)[0] 1538 if not np.all(eigenvalues >= 0): 1539 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1540 1541 return cov 1542 1543 1544def invert_corr_cov_cholesky(corr, inverrdiag): 1545 """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr` 1546 and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`. 1547 1548 Parameters 1549 ---------- 1550 corr : np.ndarray 1551 correlation matrix 1552 inverrdiag : np.ndarray 1553 diagonal matrix, the entries are the inverse errors of the data points considered 1554 """ 1555 1556 condn = np.linalg.cond(corr) 1557 if condn > 0.1 / np.finfo(float).eps: 1558 raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 1559 if condn > 1e13: 1560 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 1561 chol = np.linalg.cholesky(corr) 1562 chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True) 1563 1564 return chol_inv 1565 1566 1567def sort_corr(corr, kl, yd): 1568 """ Reorders a correlation matrix to match the alphabetical order of its underlying y data. 1569 1570 The ordering of the input correlation matrix `corr` is given by the list of keys `kl`. 1571 The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data 1572 that the correlation matrix is based on. 1573 This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr` 1574 according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds 1575 to the y data `yd` when arranged in an alphabetical order by its keys. 1576 1577 Parameters 1578 ---------- 1579 corr : np.ndarray 1580 A square correlation matrix constructed using the order of the y data specified by `kl`. 1581 The dimensions of `corr` should match the total number of y data points in `yd` combined. 1582 kl : list of str 1583 A list of keys that denotes the order in which the y data from `yd` was used to build the 1584 input correlation matrix `corr`. 1585 yd : dict of list 1586 A dictionary where each key corresponds to a unique identifier, and its value is a list of 1587 y data points. The total number of y data points across all keys must match the dimensions 1588 of `corr`. The lists in the dictionary can be lists of Obs. 1589 1590 Returns 1591 ------- 1592 np.ndarray 1593 A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys. 1594 1595 Example 1596 ------- 1597 >>> import numpy as np 1598 >>> import pyerrors as pe 1599 >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]]) 1600 >>> kl = ['b', 'a'] 1601 >>> yd = {'a': [1, 2], 'b': [3]} 1602 >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd) 1603 >>> print(sorted_corr) 1604 array([[1. , 0.3, 0.4], 1605 [0.3, 1. , 0.2], 1606 [0.4, 0.2, 1. ]]) 1607 1608 """ 1609 kl_sorted = sorted(kl) 1610 1611 posd = {} 1612 ofs = 0 1613 for ki, k in enumerate(kl): 1614 posd[k] = [i + ofs for i in range(len(yd[k]))] 1615 ofs += len(posd[k]) 1616 1617 mapping = [] 1618 for k in kl_sorted: 1619 for i in range(len(yd[k])): 1620 mapping.append(posd[k][i]) 1621 1622 corr_sorted = np.zeros_like(corr) 1623 for i in range(corr.shape[0]): 1624 for j in range(corr.shape[0]): 1625 corr_sorted[i][j] = corr[mapping[i]][mapping[j]] 1626 1627 return corr_sorted 1628 1629 1630def _smooth_eigenvalues(corr, E): 1631 """Eigenvalue smoothing as described in hep-lat/9412087 1632 1633 corr : np.ndarray 1634 correlation matrix 1635 E : integer 1636 Number of eigenvalues to be left substantially unchanged 1637 """ 1638 if not (2 < E < corr.shape[0] - 1): 1639 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1640 vals, vec = np.linalg.eigh(corr) 1641 lambda_min = np.mean(vals[:-E]) 1642 vals[vals < lambda_min] = lambda_min 1643 vals /= np.mean(vals) 1644 return vec @ np.diag(vals) @ vec.T 1645 1646 1647def _covariance_element(obs1, obs2): 1648 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1649 1650 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1651 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1652 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1653 return np.sum(deltas1 * deltas2) 1654 1655 if set(obs1.names).isdisjoint(set(obs2.names)): 1656 return 0.0 1657 1658 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1659 raise Exception('The gamma method has to be applied to both Obs first.') 1660 1661 dvalue = 0.0 1662 1663 for e_name in obs1.mc_names: 1664 1665 if e_name not in obs2.mc_names: 1666 continue 1667 1668 idl_d = {} 1669 for r_name in obs1.e_content[e_name]: 1670 if r_name not in obs2.e_content[e_name]: 1671 continue 1672 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1673 1674 gamma = 0.0 1675 1676 for r_name in obs1.e_content[e_name]: 1677 if r_name not in obs2.e_content[e_name]: 1678 continue 1679 if len(idl_d[r_name]) == 0: 1680 continue 1681 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1682 1683 if gamma == 0.0: 1684 continue 1685 1686 gamma_div = 0.0 1687 for r_name in obs1.e_content[e_name]: 1688 if r_name not in obs2.e_content[e_name]: 1689 continue 1690 if len(idl_d[r_name]) == 0: 1691 continue 1692 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1693 gamma /= gamma_div 1694 1695 dvalue += gamma 1696 1697 for e_name in obs1.cov_names: 1698 1699 if e_name not in obs2.cov_names: 1700 continue 1701 1702 dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() 1703 1704 return dvalue 1705 1706 1707def import_jackknife(jacks, name, idl=None): 1708 """Imports jackknife samples and returns an Obs 1709 1710 Parameters 1711 ---------- 1712 jacks : numpy.ndarray 1713 numpy array containing the mean value as zeroth entry and 1714 the N jackknife samples as first to Nth entry. 1715 name : str 1716 name of the ensemble the samples are defined on. 1717 """ 1718 length = len(jacks) - 1 1719 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1720 samples = jacks[1:] @ prj 1721 mean = np.mean(samples) 1722 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1723 new_obs._value = jacks[0] 1724 return new_obs 1725 1726 1727def import_bootstrap(boots, name, random_numbers): 1728 """Imports bootstrap samples and returns an Obs 1729 1730 Parameters 1731 ---------- 1732 boots : numpy.ndarray 1733 numpy array containing the mean value as zeroth entry and 1734 the N bootstrap samples as first to Nth entry. 1735 name : str 1736 name of the ensemble the samples are defined on. 1737 random_numbers : np.ndarray 1738 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, 1739 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo 1740 chain to be reconstructed. 1741 """ 1742 samples, length = random_numbers.shape 1743 if samples != len(boots) - 1: 1744 raise ValueError("Random numbers do not have the correct shape.") 1745 1746 if samples < length: 1747 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") 1748 1749 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 1750 1751 samples = scipy.linalg.lstsq(proj, boots[1:])[0] 1752 ret = Obs([samples], [name]) 1753 ret._value = boots[0] 1754 return ret 1755 1756 1757def merge_obs(list_of_obs): 1758 """Combine all observables in list_of_obs into one new observable 1759 1760 Parameters 1761 ---------- 1762 list_of_obs : list 1763 list of the Obs object to be combined 1764 1765 Notes 1766 ----- 1767 It is not possible to combine obs which are based on the same replicum 1768 """ 1769 replist = [item for obs in list_of_obs for item in obs.names] 1770 if (len(replist) == len(set(replist))) is False: 1771 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1772 if any([len(o.cov_names) for o in list_of_obs]): 1773 raise Exception('Not possible to merge data that contains covobs!') 1774 new_dict = {} 1775 idl_dict = {} 1776 for o in list_of_obs: 1777 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1778 for key in set(o.deltas) | set(o.r_values)}) 1779 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1780 1781 names = sorted(new_dict.keys()) 1782 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1783 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1784 return o 1785 1786 1787def cov_Obs(means, cov, name, grad=None): 1788 """Create an Obs based on mean(s) and a covariance matrix 1789 1790 Parameters 1791 ---------- 1792 mean : list of floats or float 1793 N mean value(s) of the new Obs 1794 cov : list or array 1795 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1796 name : str 1797 identifier for the covariance matrix 1798 grad : list or array 1799 Gradient of the Covobs wrt. the means belonging to cov. 1800 """ 1801 1802 def covobs_to_obs(co): 1803 """Make an Obs out of a Covobs 1804 1805 Parameters 1806 ---------- 1807 co : Covobs 1808 Covobs to be embedded into the Obs 1809 """ 1810 o = Obs([], [], means=[]) 1811 o._value = co.value 1812 o.names.append(co.name) 1813 o._covobs[co.name] = co 1814 o._dvalue = np.sqrt(co.errsq()) 1815 return o 1816 1817 ol = [] 1818 if isinstance(means, (float, int)): 1819 means = [means] 1820 1821 for i in range(len(means)): 1822 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1823 if ol[0].covobs[name].N != len(means): 1824 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1825 if len(ol) == 1: 1826 return ol[0] 1827 return ol 1828 1829 1830def _determine_gap(o, e_content, e_name): 1831 gaps = [] 1832 for r_name in e_content[e_name]: 1833 if isinstance(o.idl[r_name], range): 1834 gaps.append(o.idl[r_name].step) 1835 else: 1836 gaps.append(np.min(np.diff(o.idl[r_name]))) 1837 1838 gap = min(gaps) 1839 if not np.all([gi % gap == 0 for gi in gaps]): 1840 raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) 1841 1842 return gap 1843 1844 1845def _check_lists_equal(idl): 1846 ''' 1847 Use groupby to efficiently check whether all elements of idl are identical. 1848 Returns True if all elements are equal, otherwise False. 1849 1850 Parameters 1851 ---------- 1852 idl : list of lists, ranges or np.ndarrays 1853 ''' 1854 g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl]) 1855 if next(g, True) and not next(g, False): 1856 return True 1857 return False
19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise ValueError('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]]))) 109 elif np.any(dc == 0): 110 raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]]))) 111 if len(dc) == 1: 112 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 113 else: 114 self.idl[name] = list(idx) 115 else: 116 raise TypeError('incompatible type for idl[%s].' % (name)) 117 else: 118 for name, sample in sorted(zip(names, samples)): 119 self.idl[name] = range(1, len(sample) + 1) 120 121 if kwargs.get("means") is not None: 122 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 self.r_values[name] = mean 126 self.deltas[name] = sample 127 else: 128 for name, sample in sorted(zip(names, samples)): 129 self.shape[name] = len(self.idl[name]) 130 self.N += self.shape[name] 131 if len(sample) != self.shape[name]: 132 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 133 self.r_values[name] = np.mean(sample) 134 self.deltas[name] = sample - self.r_values[name] 135 self._value += self.shape[name] * self.r_values[name] 136 self._value /= self.N 137 138 self._dvalue = 0.0 139 self.ddvalue = 0.0 140 self.reweighted = False 141 142 self.tag = None 143 144 @property 145 def value(self): 146 return self._value 147 148 @property 149 def dvalue(self): 150 return self._dvalue 151 152 @property 153 def e_names(self): 154 return sorted(set([o.split('|')[0] for o in self.names])) 155 156 @property 157 def cov_names(self): 158 return sorted(set([o for o in self.covobs.keys()])) 159 160 @property 161 def mc_names(self): 162 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 163 164 @property 165 def e_content(self): 166 res = {} 167 for e, e_name in enumerate(self.e_names): 168 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 169 if e_name in self.names: 170 res[e_name].append(e_name) 171 return res 172 173 @property 174 def covobs(self): 175 return self._covobs 176 177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 gapsize = _determine_gap(self, e_content, e_name) 244 245 r_length = [] 246 for r_name in e_content[e_name]: 247 if isinstance(self.idl[r_name], range): 248 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 249 else: 250 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 251 252 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 253 w_max = max(r_length) // 2 254 e_gamma[e_name] = np.zeros(w_max) 255 self.e_rho[e_name] = np.zeros(w_max) 256 self.e_drho[e_name] = np.zeros(w_max) 257 258 for r_name in e_content[e_name]: 259 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 260 261 gamma_div = np.zeros(w_max) 262 for r_name in e_content[e_name]: 263 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 264 gamma_div[gamma_div < 1] = 1.0 265 e_gamma[e_name] /= gamma_div[:w_max] 266 267 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 268 self.e_tauint[e_name] = 0.5 269 self.e_dtauint[e_name] = 0.0 270 self.e_dvalue[e_name] = 0.0 271 self.e_ddvalue[e_name] = 0.0 272 self.e_windowsize[e_name] = 0 273 continue 274 275 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 276 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 277 # Make sure no entry of tauint is smaller than 0.5 278 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 279 # hep-lat/0306017 eq. (42) 280 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 281 self.e_n_dtauint[e_name][0] = 0.0 282 283 def _compute_drho(i): 284 tmp = (self.e_rho[e_name][i + 1:w_max] 285 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 286 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 287 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 288 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 289 290 if self.tau_exp[e_name] > 0: 291 _compute_drho(1) 292 texp = self.tau_exp[e_name] 293 # Critical slowing down analysis 294 if w_max // 2 <= 1: 295 raise Exception("Need at least 8 samples for tau_exp error analysis") 296 for n in range(1, w_max // 2): 297 _compute_drho(n + 1) 298 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 299 # Bias correction hep-lat/0306017 eq. (49) included 300 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 301 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 302 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 303 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 305 self.e_windowsize[e_name] = n 306 break 307 else: 308 if self.S[e_name] == 0.0: 309 self.e_tauint[e_name] = 0.5 310 self.e_dtauint[e_name] = 0.0 311 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 313 self.e_windowsize[e_name] = 0 314 else: 315 # Standard automatic windowing procedure 316 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 317 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 318 for n in range(1, w_max): 319 if g_w[n - 1] < 0 or n >= w_max - 1: 320 _compute_drho(n) 321 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 322 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 323 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 324 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 325 self.e_windowsize[e_name] = n 326 break 327 328 self._dvalue += self.e_dvalue[e_name] ** 2 329 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 330 331 for e_name in self.cov_names: 332 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 333 self.e_ddvalue[e_name] = 0 334 self._dvalue += self.e_dvalue[e_name]**2 335 336 self._dvalue = np.sqrt(self._dvalue) 337 if self._dvalue == 0.0: 338 self.ddvalue = 0.0 339 else: 340 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 341 return 342 343 gm = gamma_method 344 345 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 346 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 347 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 348 349 Parameters 350 ---------- 351 deltas : list 352 List of fluctuations 353 idx : list 354 List or range of configurations on which the deltas are defined. 355 shape : int 356 Number of configurations in idx. 357 w_max : int 358 Upper bound for the summation window. 359 fft : bool 360 determines whether the fft algorithm is used for the computation 361 of the autocorrelation function. 362 gapsize : int 363 The target distance between two configurations. If longer distances 364 are found in idx, the data is expanded. 365 """ 366 gamma = np.zeros(w_max) 367 deltas = _expand_deltas(deltas, idx, shape, gapsize) 368 new_shape = len(deltas) 369 if fft: 370 max_gamma = min(new_shape, w_max) 371 # The padding for the fft has to be even 372 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 373 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 374 else: 375 for n in range(w_max): 376 if new_shape - n >= 0: 377 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 378 379 return gamma 380 381 def details(self, ens_content=True): 382 """Output detailed properties of the Obs. 383 384 Parameters 385 ---------- 386 ens_content : bool 387 print details about the ensembles and replica if true. 388 """ 389 if self.tag is not None: 390 print("Description:", self.tag) 391 if not hasattr(self, 'e_dvalue'): 392 print('Result\t %3.8e' % (self.value)) 393 else: 394 if self.value == 0.0: 395 percentage = np.nan 396 else: 397 percentage = np.abs(self._dvalue / self.value) * 100 398 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 399 if len(self.e_names) > 1: 400 print(' Ensemble errors:') 401 e_content = self.e_content 402 for e_name in self.mc_names: 403 gap = _determine_gap(self, e_content, e_name) 404 405 if len(self.e_names) > 1: 406 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 407 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 408 tau_string += f" in units of {gap} config" 409 if gap > 1: 410 tau_string += "s" 411 if self.tau_exp[e_name] > 0: 412 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 413 else: 414 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 415 print(tau_string) 416 for e_name in self.cov_names: 417 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 418 if ens_content is True: 419 if len(self.e_names) == 1: 420 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 421 else: 422 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 423 my_string_list = [] 424 for key, value in sorted(self.e_content.items()): 425 if key not in self.covobs: 426 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 427 if len(value) == 1: 428 my_string += f': {self.shape[value[0]]} configurations' 429 if isinstance(self.idl[value[0]], range): 430 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 431 else: 432 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 433 else: 434 sublist = [] 435 for v in value: 436 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 437 my_substring += f': {self.shape[v]} configurations' 438 if isinstance(self.idl[v], range): 439 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 440 else: 441 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 442 sublist.append(my_substring) 443 444 my_string += '\n' + '\n'.join(sublist) 445 else: 446 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 447 my_string_list.append(my_string) 448 print('\n'.join(my_string_list)) 449 450 def reweight(self, weight): 451 """Reweight the obs with given rewighting factors. 452 453 Parameters 454 ---------- 455 weight : Obs 456 Reweighting factor. An Observable that has to be defined on a superset of the 457 configurations in obs[i].idl for all i. 458 all_configs : bool 459 if True, the reweighted observables are normalized by the average of 460 the reweighting factor on all configurations in weight.idl and not 461 on the configurations in obs[i].idl. Default False. 462 """ 463 return reweight(weight, [self])[0] 464 465 def is_zero_within_error(self, sigma=1): 466 """Checks whether the observable is zero within 'sigma' standard errors. 467 468 Parameters 469 ---------- 470 sigma : int 471 Number of standard errors used for the check. 472 473 Works only properly when the gamma method was run. 474 """ 475 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 476 477 def is_zero(self, atol=1e-10): 478 """Checks whether the observable is zero within a given tolerance. 479 480 Parameters 481 ---------- 482 atol : float 483 Absolute tolerance (for details see numpy documentation). 484 """ 485 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 486 487 def plot_tauint(self, save=None): 488 """Plot integrated autocorrelation time for each ensemble. 489 490 Parameters 491 ---------- 492 save : str 493 saves the figure to a file named 'save' if. 494 """ 495 if not hasattr(self, 'e_dvalue'): 496 raise Exception('Run the gamma method first.') 497 498 for e, e_name in enumerate(self.mc_names): 499 fig = plt.figure() 500 plt.xlabel(r'$W$') 501 plt.ylabel(r'$\tau_\mathrm{int}$') 502 length = int(len(self.e_n_tauint[e_name])) 503 if self.tau_exp[e_name] > 0: 504 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 505 x_help = np.arange(2 * self.tau_exp[e_name]) 506 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 507 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 508 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 509 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 510 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 511 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 512 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 513 else: 514 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 515 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 516 517 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 518 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 519 plt.legend() 520 plt.xlim(-0.5, xmax) 521 ylim = plt.ylim() 522 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 523 plt.draw() 524 if save: 525 fig.savefig(save + "_" + str(e)) 526 527 def plot_rho(self, save=None): 528 """Plot normalized autocorrelation function time for each ensemble. 529 530 Parameters 531 ---------- 532 save : str 533 saves the figure to a file named 'save' if. 534 """ 535 if not hasattr(self, 'e_dvalue'): 536 raise Exception('Run the gamma method first.') 537 for e, e_name in enumerate(self.mc_names): 538 fig = plt.figure() 539 plt.xlabel('W') 540 plt.ylabel('rho') 541 length = int(len(self.e_drho[e_name])) 542 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 543 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 544 if self.tau_exp[e_name] > 0: 545 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 546 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 547 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 548 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 549 else: 550 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 551 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 552 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 553 plt.xlim(-0.5, xmax) 554 plt.draw() 555 if save: 556 fig.savefig(save + "_" + str(e)) 557 558 def plot_rep_dist(self): 559 """Plot replica distribution for each ensemble with more than one replicum.""" 560 if not hasattr(self, 'e_dvalue'): 561 raise Exception('Run the gamma method first.') 562 for e, e_name in enumerate(self.mc_names): 563 if len(self.e_content[e_name]) == 1: 564 print('No replica distribution for a single replicum (', e_name, ')') 565 continue 566 r_length = [] 567 sub_r_mean = 0 568 for r, r_name in enumerate(self.e_content[e_name]): 569 r_length.append(len(self.deltas[r_name])) 570 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 571 e_N = np.sum(r_length) 572 sub_r_mean /= e_N 573 arr = np.zeros(len(self.e_content[e_name])) 574 for r, r_name in enumerate(self.e_content[e_name]): 575 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 576 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 577 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 578 plt.draw() 579 580 def plot_history(self, expand=True): 581 """Plot derived Monte Carlo history for each ensemble 582 583 Parameters 584 ---------- 585 expand : bool 586 show expanded history for irregular Monte Carlo chains (default: True). 587 """ 588 for e, e_name in enumerate(self.mc_names): 589 plt.figure() 590 r_length = [] 591 tmp = [] 592 tmp_expanded = [] 593 for r, r_name in enumerate(self.e_content[e_name]): 594 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 595 if expand: 596 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 597 r_length.append(len(tmp_expanded[-1])) 598 else: 599 r_length.append(len(tmp[-1])) 600 e_N = np.sum(r_length) 601 x = np.arange(e_N) 602 y_test = np.concatenate(tmp, axis=0) 603 if expand: 604 y = np.concatenate(tmp_expanded, axis=0) 605 else: 606 y = y_test 607 plt.errorbar(x, y, fmt='.', markersize=3) 608 plt.xlim(-0.5, e_N - 0.5) 609 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 610 plt.draw() 611 612 def plot_piechart(self, save=None): 613 """Plot piechart which shows the fractional contribution of each 614 ensemble to the error and returns a dictionary containing the fractions. 615 616 Parameters 617 ---------- 618 save : str 619 saves the figure to a file named 'save' if. 620 """ 621 if not hasattr(self, 'e_dvalue'): 622 raise Exception('Run the gamma method first.') 623 if np.isclose(0.0, self._dvalue, atol=1e-15): 624 raise Exception('Error is 0.0') 625 labels = self.e_names 626 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 627 fig1, ax1 = plt.subplots() 628 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 629 ax1.axis('equal') 630 plt.draw() 631 if save: 632 fig1.savefig(save) 633 634 return dict(zip(labels, sizes)) 635 636 def dump(self, filename, datatype="json.gz", description="", **kwargs): 637 """Dump the Obs to a file 'name' of chosen format. 638 639 Parameters 640 ---------- 641 filename : str 642 name of the file to be saved. 643 datatype : str 644 Format of the exported file. Supported formats include 645 "json.gz" and "pickle" 646 description : str 647 Description for output file, only relevant for json.gz format. 648 path : str 649 specifies a custom path for the file (default '.') 650 """ 651 if 'path' in kwargs: 652 file_name = kwargs.get('path') + '/' + filename 653 else: 654 file_name = filename 655 656 if datatype == "json.gz": 657 from .input.json import dump_to_json 658 dump_to_json([self], file_name, description=description) 659 elif datatype == "pickle": 660 with open(file_name + '.p', 'wb') as fb: 661 pickle.dump(self, fb) 662 else: 663 raise Exception("Unknown datatype " + str(datatype)) 664 665 def export_jackknife(self): 666 """Export jackknife samples from the Obs 667 668 Returns 669 ------- 670 numpy.ndarray 671 Returns a numpy array of length N + 1 where N is the number of samples 672 for the given ensemble and replicum. The zeroth entry of the array contains 673 the mean value of the Obs, entries 1 to N contain the N jackknife samples 674 derived from the Obs. The current implementation only works for observables 675 defined on exactly one ensemble and replicum. The derived jackknife samples 676 should agree with samples from a full jackknife analysis up to O(1/N). 677 """ 678 679 if len(self.names) != 1: 680 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 681 682 name = self.names[0] 683 full_data = self.deltas[name] + self.r_values[name] 684 n = full_data.size 685 mean = self.value 686 tmp_jacks = np.zeros(n + 1) 687 tmp_jacks[0] = mean 688 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 689 return tmp_jacks 690 691 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 692 """Export bootstrap samples from the Obs 693 694 Parameters 695 ---------- 696 samples : int 697 Number of bootstrap samples to generate. 698 random_numbers : np.ndarray 699 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 700 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 701 save_rng : str 702 Save the random numbers to a file if a path is specified. 703 704 Returns 705 ------- 706 numpy.ndarray 707 Returns a numpy array of length N + 1 where N is the number of samples 708 for the given ensemble and replicum. The zeroth entry of the array contains 709 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 710 derived from the Obs. The current implementation only works for observables 711 defined on exactly one ensemble and replicum. The derived bootstrap samples 712 should agree with samples from a full bootstrap analysis up to O(1/N). 713 """ 714 if len(self.names) != 1: 715 raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 716 717 name = self.names[0] 718 length = self.N 719 720 if random_numbers is None: 721 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 722 rng = np.random.default_rng(seed) 723 random_numbers = rng.integers(0, length, size=(samples, length)) 724 725 if save_rng is not None: 726 np.savetxt(save_rng, random_numbers, fmt='%i') 727 728 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 729 ret = np.zeros(samples + 1) 730 ret[0] = self.value 731 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 732 return ret 733 734 def __float__(self): 735 return float(self.value) 736 737 def __repr__(self): 738 return 'Obs[' + str(self) + ']' 739 740 def __str__(self): 741 return _format_uncertainty(self.value, self._dvalue) 742 743 def __format__(self, format_type): 744 if format_type == "": 745 significance = 2 746 else: 747 significance = int(float(format_type.replace("+", "").replace("-", ""))) 748 my_str = _format_uncertainty(self.value, self._dvalue, 749 significance=significance) 750 for char in ["+", " "]: 751 if format_type.startswith(char): 752 if my_str[0] != "-": 753 my_str = char + my_str 754 return my_str 755 756 def __hash__(self): 757 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 758 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 759 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 760 hash_tuple += tuple([o.encode() for o in self.names]) 761 m = hashlib.md5() 762 [m.update(o) for o in hash_tuple] 763 return int(m.hexdigest(), 16) & 0xFFFFFFFF 764 765 # Overload comparisons 766 def __lt__(self, other): 767 return self.value < other 768 769 def __le__(self, other): 770 return self.value <= other 771 772 def __gt__(self, other): 773 return self.value > other 774 775 def __ge__(self, other): 776 return self.value >= other 777 778 def __eq__(self, other): 779 if other is None: 780 return False 781 return (self - other).is_zero() 782 783 # Overload math operations 784 def __add__(self, y): 785 if isinstance(y, Obs): 786 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 787 else: 788 if isinstance(y, np.ndarray): 789 return np.array([self + o for o in y]) 790 elif isinstance(y, complex): 791 return CObs(self, 0) + y 792 elif y.__class__.__name__ in ['Corr', 'CObs']: 793 return NotImplemented 794 else: 795 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 796 797 def __radd__(self, y): 798 return self + y 799 800 def __mul__(self, y): 801 if isinstance(y, Obs): 802 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 803 else: 804 if isinstance(y, np.ndarray): 805 return np.array([self * o for o in y]) 806 elif isinstance(y, complex): 807 return CObs(self * y.real, self * y.imag) 808 elif y.__class__.__name__ in ['Corr', 'CObs']: 809 return NotImplemented 810 else: 811 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 812 813 def __rmul__(self, y): 814 return self * y 815 816 def __sub__(self, y): 817 if isinstance(y, Obs): 818 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 819 else: 820 if isinstance(y, np.ndarray): 821 return np.array([self - o for o in y]) 822 elif y.__class__.__name__ in ['Corr', 'CObs']: 823 return NotImplemented 824 else: 825 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 826 827 def __rsub__(self, y): 828 return -1 * (self - y) 829 830 def __pos__(self): 831 return self 832 833 def __neg__(self): 834 return -1 * self 835 836 def __truediv__(self, y): 837 if isinstance(y, Obs): 838 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 839 else: 840 if isinstance(y, np.ndarray): 841 return np.array([self / o for o in y]) 842 elif y.__class__.__name__ in ['Corr', 'CObs']: 843 return NotImplemented 844 else: 845 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 846 847 def __rtruediv__(self, y): 848 if isinstance(y, Obs): 849 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 850 else: 851 if isinstance(y, np.ndarray): 852 return np.array([o / self for o in y]) 853 elif y.__class__.__name__ in ['Corr', 'CObs']: 854 return NotImplemented 855 else: 856 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 857 858 def __pow__(self, y): 859 if isinstance(y, Obs): 860 return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)]) 861 else: 862 return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)]) 863 864 def __rpow__(self, y): 865 return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)]) 866 867 def __abs__(self): 868 return derived_observable(lambda x: anp.abs(x[0]), [self]) 869 870 # Overload numpy functions 871 def sqrt(self): 872 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 873 874 def log(self): 875 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 876 877 def exp(self): 878 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 879 880 def sin(self): 881 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 882 883 def cos(self): 884 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 885 886 def tan(self): 887 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 888 889 def arcsin(self): 890 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 891 892 def arccos(self): 893 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 894 895 def arctan(self): 896 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 897 898 def sinh(self): 899 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 900 901 def cosh(self): 902 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 903 904 def tanh(self): 905 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 906 907 def arcsinh(self): 908 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 909 910 def arccosh(self): 911 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 912 913 def arctanh(self): 914 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 else: 87 if not isinstance(names[0], str): 88 raise TypeError('All names have to be strings.') 89 if min(len(x) for x in samples) <= 4: 90 raise ValueError('Samples have to have at least 5 entries.') 91 92 self.names = sorted(names) 93 self.shape = {} 94 self.r_values = {} 95 self.deltas = {} 96 self._covobs = {} 97 98 self._value = 0 99 self.N = 0 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]]))) 109 elif np.any(dc == 0): 110 raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]]))) 111 if len(dc) == 1: 112 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 113 else: 114 self.idl[name] = list(idx) 115 else: 116 raise TypeError('incompatible type for idl[%s].' % (name)) 117 else: 118 for name, sample in sorted(zip(names, samples)): 119 self.idl[name] = range(1, len(sample) + 1) 120 121 if kwargs.get("means") is not None: 122 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 self.r_values[name] = mean 126 self.deltas[name] = sample 127 else: 128 for name, sample in sorted(zip(names, samples)): 129 self.shape[name] = len(self.idl[name]) 130 self.N += self.shape[name] 131 if len(sample) != self.shape[name]: 132 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 133 self.r_values[name] = np.mean(sample) 134 self.deltas[name] = sample - self.r_values[name] 135 self._value += self.shape[name] * self.r_values[name] 136 self._value /= self.N 137 138 self._dvalue = 0.0 139 self.ddvalue = 0.0 140 self.reweighted = False 141 142 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 gapsize = _determine_gap(self, e_content, e_name) 244 245 r_length = [] 246 for r_name in e_content[e_name]: 247 if isinstance(self.idl[r_name], range): 248 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 249 else: 250 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 251 252 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 253 w_max = max(r_length) // 2 254 e_gamma[e_name] = np.zeros(w_max) 255 self.e_rho[e_name] = np.zeros(w_max) 256 self.e_drho[e_name] = np.zeros(w_max) 257 258 for r_name in e_content[e_name]: 259 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 260 261 gamma_div = np.zeros(w_max) 262 for r_name in e_content[e_name]: 263 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 264 gamma_div[gamma_div < 1] = 1.0 265 e_gamma[e_name] /= gamma_div[:w_max] 266 267 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 268 self.e_tauint[e_name] = 0.5 269 self.e_dtauint[e_name] = 0.0 270 self.e_dvalue[e_name] = 0.0 271 self.e_ddvalue[e_name] = 0.0 272 self.e_windowsize[e_name] = 0 273 continue 274 275 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 276 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 277 # Make sure no entry of tauint is smaller than 0.5 278 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 279 # hep-lat/0306017 eq. (42) 280 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 281 self.e_n_dtauint[e_name][0] = 0.0 282 283 def _compute_drho(i): 284 tmp = (self.e_rho[e_name][i + 1:w_max] 285 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 286 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 287 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 288 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 289 290 if self.tau_exp[e_name] > 0: 291 _compute_drho(1) 292 texp = self.tau_exp[e_name] 293 # Critical slowing down analysis 294 if w_max // 2 <= 1: 295 raise Exception("Need at least 8 samples for tau_exp error analysis") 296 for n in range(1, w_max // 2): 297 _compute_drho(n + 1) 298 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 299 # Bias correction hep-lat/0306017 eq. (49) included 300 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 301 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 302 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 303 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 305 self.e_windowsize[e_name] = n 306 break 307 else: 308 if self.S[e_name] == 0.0: 309 self.e_tauint[e_name] = 0.5 310 self.e_dtauint[e_name] = 0.0 311 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 313 self.e_windowsize[e_name] = 0 314 else: 315 # Standard automatic windowing procedure 316 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 317 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 318 for n in range(1, w_max): 319 if g_w[n - 1] < 0 or n >= w_max - 1: 320 _compute_drho(n) 321 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 322 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 323 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 324 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 325 self.e_windowsize[e_name] = n 326 break 327 328 self._dvalue += self.e_dvalue[e_name] ** 2 329 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 330 331 for e_name in self.cov_names: 332 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 333 self.e_ddvalue[e_name] = 0 334 self._dvalue += self.e_dvalue[e_name]**2 335 336 self._dvalue = np.sqrt(self._dvalue) 337 if self._dvalue == 0.0: 338 self.ddvalue = 0.0 339 else: 340 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 341 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
177 def gamma_method(self, **kwargs): 178 """Estimate the error and related properties of the Obs. 179 180 Parameters 181 ---------- 182 S : float 183 specifies a custom value for the parameter S (default 2.0). 184 If set to 0 it is assumed that the data exhibits no 185 autocorrelation. In this case the error estimates coincides 186 with the sample standard error. 187 tau_exp : float 188 positive value triggers the critical slowing down analysis 189 (default 0.0). 190 N_sigma : float 191 number of standard deviations from zero until the tail is 192 attached to the autocorrelation function (default 1). 193 fft : bool 194 determines whether the fft algorithm is used for the computation 195 of the autocorrelation function (default True) 196 """ 197 198 e_content = self.e_content 199 self.e_dvalue = {} 200 self.e_ddvalue = {} 201 self.e_tauint = {} 202 self.e_dtauint = {} 203 self.e_windowsize = {} 204 self.e_n_tauint = {} 205 self.e_n_dtauint = {} 206 e_gamma = {} 207 self.e_rho = {} 208 self.e_drho = {} 209 self._dvalue = 0 210 self.ddvalue = 0 211 212 self.S = {} 213 self.tau_exp = {} 214 self.N_sigma = {} 215 216 if kwargs.get('fft') is False: 217 fft = False 218 else: 219 fft = True 220 221 def _parse_kwarg(kwarg_name): 222 if kwarg_name in kwargs: 223 tmp = kwargs.get(kwarg_name) 224 if isinstance(tmp, (int, float)): 225 if tmp < 0: 226 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 227 for e, e_name in enumerate(self.e_names): 228 getattr(self, kwarg_name)[e_name] = tmp 229 else: 230 raise TypeError(kwarg_name + ' is not in proper format.') 231 else: 232 for e, e_name in enumerate(self.e_names): 233 if e_name in getattr(Obs, kwarg_name + '_dict'): 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 235 else: 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 237 238 _parse_kwarg('S') 239 _parse_kwarg('tau_exp') 240 _parse_kwarg('N_sigma') 241 242 for e, e_name in enumerate(self.mc_names): 243 gapsize = _determine_gap(self, e_content, e_name) 244 245 r_length = [] 246 for r_name in e_content[e_name]: 247 if isinstance(self.idl[r_name], range): 248 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 249 else: 250 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 251 252 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 253 w_max = max(r_length) // 2 254 e_gamma[e_name] = np.zeros(w_max) 255 self.e_rho[e_name] = np.zeros(w_max) 256 self.e_drho[e_name] = np.zeros(w_max) 257 258 for r_name in e_content[e_name]: 259 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 260 261 gamma_div = np.zeros(w_max) 262 for r_name in e_content[e_name]: 263 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 264 gamma_div[gamma_div < 1] = 1.0 265 e_gamma[e_name] /= gamma_div[:w_max] 266 267 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 268 self.e_tauint[e_name] = 0.5 269 self.e_dtauint[e_name] = 0.0 270 self.e_dvalue[e_name] = 0.0 271 self.e_ddvalue[e_name] = 0.0 272 self.e_windowsize[e_name] = 0 273 continue 274 275 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 276 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 277 # Make sure no entry of tauint is smaller than 0.5 278 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 279 # hep-lat/0306017 eq. (42) 280 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 281 self.e_n_dtauint[e_name][0] = 0.0 282 283 def _compute_drho(i): 284 tmp = (self.e_rho[e_name][i + 1:w_max] 285 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 286 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 287 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 288 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 289 290 if self.tau_exp[e_name] > 0: 291 _compute_drho(1) 292 texp = self.tau_exp[e_name] 293 # Critical slowing down analysis 294 if w_max // 2 <= 1: 295 raise Exception("Need at least 8 samples for tau_exp error analysis") 296 for n in range(1, w_max // 2): 297 _compute_drho(n + 1) 298 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 299 # Bias correction hep-lat/0306017 eq. (49) included 300 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 301 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 302 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 303 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 305 self.e_windowsize[e_name] = n 306 break 307 else: 308 if self.S[e_name] == 0.0: 309 self.e_tauint[e_name] = 0.5 310 self.e_dtauint[e_name] = 0.0 311 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 312 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 313 self.e_windowsize[e_name] = 0 314 else: 315 # Standard automatic windowing procedure 316 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 317 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 318 for n in range(1, w_max): 319 if g_w[n - 1] < 0 or n >= w_max - 1: 320 _compute_drho(n) 321 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 322 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 323 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 324 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 325 self.e_windowsize[e_name] = n 326 break 327 328 self._dvalue += self.e_dvalue[e_name] ** 2 329 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 330 331 for e_name in self.cov_names: 332 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 333 self.e_ddvalue[e_name] = 0 334 self._dvalue += self.e_dvalue[e_name]**2 335 336 self._dvalue = np.sqrt(self._dvalue) 337 if self._dvalue == 0.0: 338 self.ddvalue = 0.0 339 else: 340 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 341 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
381 def details(self, ens_content=True): 382 """Output detailed properties of the Obs. 383 384 Parameters 385 ---------- 386 ens_content : bool 387 print details about the ensembles and replica if true. 388 """ 389 if self.tag is not None: 390 print("Description:", self.tag) 391 if not hasattr(self, 'e_dvalue'): 392 print('Result\t %3.8e' % (self.value)) 393 else: 394 if self.value == 0.0: 395 percentage = np.nan 396 else: 397 percentage = np.abs(self._dvalue / self.value) * 100 398 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 399 if len(self.e_names) > 1: 400 print(' Ensemble errors:') 401 e_content = self.e_content 402 for e_name in self.mc_names: 403 gap = _determine_gap(self, e_content, e_name) 404 405 if len(self.e_names) > 1: 406 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 407 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 408 tau_string += f" in units of {gap} config" 409 if gap > 1: 410 tau_string += "s" 411 if self.tau_exp[e_name] > 0: 412 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 413 else: 414 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 415 print(tau_string) 416 for e_name in self.cov_names: 417 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 418 if ens_content is True: 419 if len(self.e_names) == 1: 420 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 421 else: 422 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 423 my_string_list = [] 424 for key, value in sorted(self.e_content.items()): 425 if key not in self.covobs: 426 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 427 if len(value) == 1: 428 my_string += f': {self.shape[value[0]]} configurations' 429 if isinstance(self.idl[value[0]], range): 430 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 431 else: 432 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 433 else: 434 sublist = [] 435 for v in value: 436 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 437 my_substring += f': {self.shape[v]} configurations' 438 if isinstance(self.idl[v], range): 439 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 440 else: 441 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 442 sublist.append(my_substring) 443 444 my_string += '\n' + '\n'.join(sublist) 445 else: 446 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 447 my_string_list.append(my_string) 448 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
450 def reweight(self, weight): 451 """Reweight the obs with given rewighting factors. 452 453 Parameters 454 ---------- 455 weight : Obs 456 Reweighting factor. An Observable that has to be defined on a superset of the 457 configurations in obs[i].idl for all i. 458 all_configs : bool 459 if True, the reweighted observables are normalized by the average of 460 the reweighting factor on all configurations in weight.idl and not 461 on the configurations in obs[i].idl. Default False. 462 """ 463 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
465 def is_zero_within_error(self, sigma=1): 466 """Checks whether the observable is zero within 'sigma' standard errors. 467 468 Parameters 469 ---------- 470 sigma : int 471 Number of standard errors used for the check. 472 473 Works only properly when the gamma method was run. 474 """ 475 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
477 def is_zero(self, atol=1e-10): 478 """Checks whether the observable is zero within a given tolerance. 479 480 Parameters 481 ---------- 482 atol : float 483 Absolute tolerance (for details see numpy documentation). 484 """ 485 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
487 def plot_tauint(self, save=None): 488 """Plot integrated autocorrelation time for each ensemble. 489 490 Parameters 491 ---------- 492 save : str 493 saves the figure to a file named 'save' if. 494 """ 495 if not hasattr(self, 'e_dvalue'): 496 raise Exception('Run the gamma method first.') 497 498 for e, e_name in enumerate(self.mc_names): 499 fig = plt.figure() 500 plt.xlabel(r'$W$') 501 plt.ylabel(r'$\tau_\mathrm{int}$') 502 length = int(len(self.e_n_tauint[e_name])) 503 if self.tau_exp[e_name] > 0: 504 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 505 x_help = np.arange(2 * self.tau_exp[e_name]) 506 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 507 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 508 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 509 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 510 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 511 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 512 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 513 else: 514 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 515 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 516 517 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 518 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 519 plt.legend() 520 plt.xlim(-0.5, xmax) 521 ylim = plt.ylim() 522 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 523 plt.draw() 524 if save: 525 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
527 def plot_rho(self, save=None): 528 """Plot normalized autocorrelation function time for each ensemble. 529 530 Parameters 531 ---------- 532 save : str 533 saves the figure to a file named 'save' if. 534 """ 535 if not hasattr(self, 'e_dvalue'): 536 raise Exception('Run the gamma method first.') 537 for e, e_name in enumerate(self.mc_names): 538 fig = plt.figure() 539 plt.xlabel('W') 540 plt.ylabel('rho') 541 length = int(len(self.e_drho[e_name])) 542 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 543 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 544 if self.tau_exp[e_name] > 0: 545 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 546 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 547 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 548 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 549 else: 550 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 551 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 552 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 553 plt.xlim(-0.5, xmax) 554 plt.draw() 555 if save: 556 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
558 def plot_rep_dist(self): 559 """Plot replica distribution for each ensemble with more than one replicum.""" 560 if not hasattr(self, 'e_dvalue'): 561 raise Exception('Run the gamma method first.') 562 for e, e_name in enumerate(self.mc_names): 563 if len(self.e_content[e_name]) == 1: 564 print('No replica distribution for a single replicum (', e_name, ')') 565 continue 566 r_length = [] 567 sub_r_mean = 0 568 for r, r_name in enumerate(self.e_content[e_name]): 569 r_length.append(len(self.deltas[r_name])) 570 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 571 e_N = np.sum(r_length) 572 sub_r_mean /= e_N 573 arr = np.zeros(len(self.e_content[e_name])) 574 for r, r_name in enumerate(self.e_content[e_name]): 575 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 576 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 577 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 578 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
580 def plot_history(self, expand=True): 581 """Plot derived Monte Carlo history for each ensemble 582 583 Parameters 584 ---------- 585 expand : bool 586 show expanded history for irregular Monte Carlo chains (default: True). 587 """ 588 for e, e_name in enumerate(self.mc_names): 589 plt.figure() 590 r_length = [] 591 tmp = [] 592 tmp_expanded = [] 593 for r, r_name in enumerate(self.e_content[e_name]): 594 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 595 if expand: 596 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 597 r_length.append(len(tmp_expanded[-1])) 598 else: 599 r_length.append(len(tmp[-1])) 600 e_N = np.sum(r_length) 601 x = np.arange(e_N) 602 y_test = np.concatenate(tmp, axis=0) 603 if expand: 604 y = np.concatenate(tmp_expanded, axis=0) 605 else: 606 y = y_test 607 plt.errorbar(x, y, fmt='.', markersize=3) 608 plt.xlim(-0.5, e_N - 0.5) 609 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 610 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
612 def plot_piechart(self, save=None): 613 """Plot piechart which shows the fractional contribution of each 614 ensemble to the error and returns a dictionary containing the fractions. 615 616 Parameters 617 ---------- 618 save : str 619 saves the figure to a file named 'save' if. 620 """ 621 if not hasattr(self, 'e_dvalue'): 622 raise Exception('Run the gamma method first.') 623 if np.isclose(0.0, self._dvalue, atol=1e-15): 624 raise Exception('Error is 0.0') 625 labels = self.e_names 626 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 627 fig1, ax1 = plt.subplots() 628 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 629 ax1.axis('equal') 630 plt.draw() 631 if save: 632 fig1.savefig(save) 633 634 return dict(zip(labels, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
636 def dump(self, filename, datatype="json.gz", description="", **kwargs): 637 """Dump the Obs to a file 'name' of chosen format. 638 639 Parameters 640 ---------- 641 filename : str 642 name of the file to be saved. 643 datatype : str 644 Format of the exported file. Supported formats include 645 "json.gz" and "pickle" 646 description : str 647 Description for output file, only relevant for json.gz format. 648 path : str 649 specifies a custom path for the file (default '.') 650 """ 651 if 'path' in kwargs: 652 file_name = kwargs.get('path') + '/' + filename 653 else: 654 file_name = filename 655 656 if datatype == "json.gz": 657 from .input.json import dump_to_json 658 dump_to_json([self], file_name, description=description) 659 elif datatype == "pickle": 660 with open(file_name + '.p', 'wb') as fb: 661 pickle.dump(self, fb) 662 else: 663 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
665 def export_jackknife(self): 666 """Export jackknife samples from the Obs 667 668 Returns 669 ------- 670 numpy.ndarray 671 Returns a numpy array of length N + 1 where N is the number of samples 672 for the given ensemble and replicum. The zeroth entry of the array contains 673 the mean value of the Obs, entries 1 to N contain the N jackknife samples 674 derived from the Obs. The current implementation only works for observables 675 defined on exactly one ensemble and replicum. The derived jackknife samples 676 should agree with samples from a full jackknife analysis up to O(1/N). 677 """ 678 679 if len(self.names) != 1: 680 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 681 682 name = self.names[0] 683 full_data = self.deltas[name] + self.r_values[name] 684 n = full_data.size 685 mean = self.value 686 tmp_jacks = np.zeros(n + 1) 687 tmp_jacks[0] = mean 688 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 689 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
691 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 692 """Export bootstrap samples from the Obs 693 694 Parameters 695 ---------- 696 samples : int 697 Number of bootstrap samples to generate. 698 random_numbers : np.ndarray 699 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 700 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 701 save_rng : str 702 Save the random numbers to a file if a path is specified. 703 704 Returns 705 ------- 706 numpy.ndarray 707 Returns a numpy array of length N + 1 where N is the number of samples 708 for the given ensemble and replicum. The zeroth entry of the array contains 709 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 710 derived from the Obs. The current implementation only works for observables 711 defined on exactly one ensemble and replicum. The derived bootstrap samples 712 should agree with samples from a full bootstrap analysis up to O(1/N). 713 """ 714 if len(self.names) != 1: 715 raise Exception("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 716 717 name = self.names[0] 718 length = self.N 719 720 if random_numbers is None: 721 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 722 rng = np.random.default_rng(seed) 723 random_numbers = rng.integers(0, length, size=(samples, length)) 724 725 if save_rng is not None: 726 np.savetxt(save_rng, random_numbers, fmt='%i') 727 728 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 729 ret = np.zeros(samples + 1) 730 ret[0] = self.value 731 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 732 return ret
Export bootstrap samples from the Obs
Parameters
- samples (int): Number of bootstrap samples to generate.
- random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
- save_rng (str): Save the random numbers to a file if a path is specified.
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived bootstrap samples should agree with samples from a full bootstrap analysis up to O(1/N).
917class CObs: 918 """Class for a complex valued observable.""" 919 __slots__ = ['_real', '_imag', 'tag'] 920 921 def __init__(self, real, imag=0.0): 922 self._real = real 923 self._imag = imag 924 self.tag = None 925 926 @property 927 def real(self): 928 return self._real 929 930 @property 931 def imag(self): 932 return self._imag 933 934 def gamma_method(self, **kwargs): 935 """Executes the gamma_method for the real and the imaginary part.""" 936 if isinstance(self.real, Obs): 937 self.real.gamma_method(**kwargs) 938 if isinstance(self.imag, Obs): 939 self.imag.gamma_method(**kwargs) 940 941 def is_zero(self): 942 """Checks whether both real and imaginary part are zero within machine precision.""" 943 return self.real == 0.0 and self.imag == 0.0 944 945 def conjugate(self): 946 return CObs(self.real, -self.imag) 947 948 def __add__(self, other): 949 if isinstance(other, np.ndarray): 950 return other + self 951 elif hasattr(other, 'real') and hasattr(other, 'imag'): 952 return CObs(self.real + other.real, 953 self.imag + other.imag) 954 else: 955 return CObs(self.real + other, self.imag) 956 957 def __radd__(self, y): 958 return self + y 959 960 def __sub__(self, other): 961 if isinstance(other, np.ndarray): 962 return -1 * (other - self) 963 elif hasattr(other, 'real') and hasattr(other, 'imag'): 964 return CObs(self.real - other.real, self.imag - other.imag) 965 else: 966 return CObs(self.real - other, self.imag) 967 968 def __rsub__(self, other): 969 return -1 * (self - other) 970 971 def __mul__(self, other): 972 if isinstance(other, np.ndarray): 973 return other * self 974 elif hasattr(other, 'real') and hasattr(other, 'imag'): 975 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 976 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 977 [self.real, other.real, self.imag, other.imag], 978 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 979 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 980 [self.real, other.real, self.imag, other.imag], 981 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 982 elif getattr(other, 'imag', 0) != 0: 983 return CObs(self.real * other.real - self.imag * other.imag, 984 self.imag * other.real + self.real * other.imag) 985 else: 986 return CObs(self.real * other.real, self.imag * other.real) 987 else: 988 return CObs(self.real * other, self.imag * other) 989 990 def __rmul__(self, other): 991 return self * other 992 993 def __truediv__(self, other): 994 if isinstance(other, np.ndarray): 995 return 1 / (other / self) 996 elif hasattr(other, 'real') and hasattr(other, 'imag'): 997 r = other.real ** 2 + other.imag ** 2 998 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 999 else: 1000 return CObs(self.real / other, self.imag / other) 1001 1002 def __rtruediv__(self, other): 1003 r = self.real ** 2 + self.imag ** 2 1004 if hasattr(other, 'real') and hasattr(other, 'imag'): 1005 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 1006 else: 1007 return CObs(self.real * other / r, -self.imag * other / r) 1008 1009 def __abs__(self): 1010 return np.sqrt(self.real**2 + self.imag**2) 1011 1012 def __pos__(self): 1013 return self 1014 1015 def __neg__(self): 1016 return -1 * self 1017 1018 def __eq__(self, other): 1019 return self.real == other.real and self.imag == other.imag 1020 1021 def __str__(self): 1022 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 1023 1024 def __repr__(self): 1025 return 'CObs[' + str(self) + ']' 1026 1027 def __format__(self, format_type): 1028 if format_type == "": 1029 significance = 2 1030 format_type = "2" 1031 else: 1032 significance = int(float(format_type.replace("+", "").replace("-", ""))) 1033 return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
Class for a complex valued observable.
934 def gamma_method(self, **kwargs): 935 """Executes the gamma_method for the real and the imaginary part.""" 936 if isinstance(self.real, Obs): 937 self.real.gamma_method(**kwargs) 938 if isinstance(self.imag, Obs): 939 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1036def gamma_method(x, **kwargs): 1037 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1038 1039 See docstring of pe.Obs.gamma_method for details. 1040 """ 1041 return np.vectorize(lambda o: o.gm(**kwargs))(x)
Vectorized version of the gamma_method applicable to lists or arrays of Obs.
See docstring of pe.Obs.gamma_method for details.
1036def gamma_method(x, **kwargs): 1037 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1038 1039 See docstring of pe.Obs.gamma_method for details. 1040 """ 1041 return np.vectorize(lambda o: o.gm(**kwargs))(x)
Vectorized version of the gamma_method applicable to lists or arrays of Obs.
See docstring of pe.Obs.gamma_method for details.
1171def derived_observable(func, data, array_mode=False, **kwargs): 1172 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1173 1174 Parameters 1175 ---------- 1176 func : object 1177 arbitrary function of the form func(data, **kwargs). For the 1178 automatic differentiation to work, all numpy functions have to have 1179 the autograd wrapper (use 'import autograd.numpy as anp'). 1180 data : list 1181 list of Obs, e.g. [obs1, obs2, obs3]. 1182 num_grad : bool 1183 if True, numerical derivatives are used instead of autograd 1184 (default False). To control the numerical differentiation the 1185 kwargs of numdifftools.step_generators.MaxStepGenerator 1186 can be used. 1187 man_grad : list 1188 manually supply a list or an array which contains the jacobian 1189 of func. Use cautiously, supplying the wrong derivative will 1190 not be intercepted. 1191 1192 Notes 1193 ----- 1194 For simple mathematical operations it can be practical to use anonymous 1195 functions. For the ratio of two observables one can e.g. use 1196 1197 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1198 """ 1199 1200 data = np.asarray(data) 1201 raveled_data = data.ravel() 1202 1203 # Workaround for matrix operations containing non Obs data 1204 if not all(isinstance(x, Obs) for x in raveled_data): 1205 for i in range(len(raveled_data)): 1206 if isinstance(raveled_data[i], (int, float)): 1207 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1208 1209 allcov = {} 1210 for o in raveled_data: 1211 for name in o.cov_names: 1212 if name in allcov: 1213 if not np.allclose(allcov[name], o.covobs[name].cov): 1214 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1215 else: 1216 allcov[name] = o.covobs[name].cov 1217 1218 n_obs = len(raveled_data) 1219 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1220 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1221 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1222 1223 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1224 1225 if data.ndim == 1: 1226 values = np.array([o.value for o in data]) 1227 else: 1228 values = np.vectorize(lambda x: x.value)(data) 1229 1230 new_values = func(values, **kwargs) 1231 1232 multi = int(isinstance(new_values, np.ndarray)) 1233 1234 new_r_values = {} 1235 new_idl_d = {} 1236 for name in new_sample_names: 1237 idl = [] 1238 tmp_values = np.zeros(n_obs) 1239 for i, item in enumerate(raveled_data): 1240 tmp_values[i] = item.r_values.get(name, item.value) 1241 tmp_idl = item.idl.get(name) 1242 if tmp_idl is not None: 1243 idl.append(tmp_idl) 1244 if multi > 0: 1245 tmp_values = np.array(tmp_values).reshape(data.shape) 1246 new_r_values[name] = func(tmp_values, **kwargs) 1247 new_idl_d[name] = _merge_idx(idl) 1248 1249 def _compute_scalefactor_missing_rep(obs): 1250 """ 1251 Computes the scale factor that is to be multiplied with the deltas 1252 in the case where Obs with different subsets of replica are merged. 1253 Returns a dictionary with the scale factor for each Monte Carlo name. 1254 1255 Parameters 1256 ---------- 1257 obs : Obs 1258 The observable corresponding to the deltas that are to be scaled 1259 """ 1260 scalef_d = {} 1261 for mc_name in obs.mc_names: 1262 mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')] 1263 new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')] 1264 if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d): 1265 scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d]) 1266 return scalef_d 1267 1268 if 'man_grad' in kwargs: 1269 deriv = np.asarray(kwargs.get('man_grad')) 1270 if new_values.shape + data.shape != deriv.shape: 1271 raise Exception('Manual derivative does not have correct shape.') 1272 elif kwargs.get('num_grad') is True: 1273 if multi > 0: 1274 raise Exception('Multi mode currently not supported for numerical derivative') 1275 options = { 1276 'base_step': 0.1, 1277 'step_ratio': 2.5} 1278 for key in options.keys(): 1279 kwarg = kwargs.get(key) 1280 if kwarg is not None: 1281 options[key] = kwarg 1282 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1283 if tmp_df.size == 1: 1284 deriv = np.array([tmp_df.real]) 1285 else: 1286 deriv = tmp_df.real 1287 else: 1288 deriv = jacobian(func)(values, **kwargs) 1289 1290 final_result = np.zeros(new_values.shape, dtype=object) 1291 1292 if array_mode is True: 1293 1294 class _Zero_grad(): 1295 def __init__(self, N): 1296 self.grad = np.zeros((N, 1)) 1297 1298 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1299 d_extracted = {} 1300 g_extracted = {} 1301 for name in new_sample_names: 1302 d_extracted[name] = [] 1303 ens_length = len(new_idl_d[name]) 1304 for i_dat, dat in enumerate(data): 1305 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1306 for name in new_cov_names: 1307 g_extracted[name] = [] 1308 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1309 for i_dat, dat in enumerate(data): 1310 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1311 1312 for i_val, new_val in np.ndenumerate(new_values): 1313 new_deltas = {} 1314 new_grad = {} 1315 if array_mode is True: 1316 for name in new_sample_names: 1317 ens_length = d_extracted[name][0].shape[-1] 1318 new_deltas[name] = np.zeros(ens_length) 1319 for i_dat, dat in enumerate(d_extracted[name]): 1320 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1321 for name in new_cov_names: 1322 new_grad[name] = 0 1323 for i_dat, dat in enumerate(g_extracted[name]): 1324 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1325 else: 1326 for j_obs, obs in np.ndenumerate(data): 1327 scalef_d = _compute_scalefactor_missing_rep(obs) 1328 for name in obs.names: 1329 if name in obs.cov_names: 1330 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1331 else: 1332 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1)) 1333 1334 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1335 1336 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1337 raise Exception('The same name has been used for deltas and covobs!') 1338 new_samples = [] 1339 new_means = [] 1340 new_idl = [] 1341 new_names_obs = [] 1342 for name in new_names: 1343 if name not in new_covobs: 1344 new_samples.append(new_deltas[name]) 1345 new_idl.append(new_idl_d[name]) 1346 new_means.append(new_r_values[name][i_val]) 1347 new_names_obs.append(name) 1348 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1349 for name in new_covobs: 1350 final_result[i_val].names.append(name) 1351 final_result[i_val]._covobs = new_covobs 1352 final_result[i_val]._value = new_val 1353 final_result[i_val].reweighted = reweighted 1354 1355 if multi == 0: 1356 final_result = final_result.item() 1357 1358 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1390def reweight(weight, obs, **kwargs): 1391 """Reweight a list of observables. 1392 1393 Parameters 1394 ---------- 1395 weight : Obs 1396 Reweighting factor. An Observable that has to be defined on a superset of the 1397 configurations in obs[i].idl for all i. 1398 obs : list 1399 list of Obs, e.g. [obs1, obs2, obs3]. 1400 all_configs : bool 1401 if True, the reweighted observables are normalized by the average of 1402 the reweighting factor on all configurations in weight.idl and not 1403 on the configurations in obs[i].idl. Default False. 1404 """ 1405 result = [] 1406 for i in range(len(obs)): 1407 if len(obs[i].cov_names): 1408 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1409 if not set(obs[i].names).issubset(weight.names): 1410 raise Exception('Error: Ensembles do not fit') 1411 for name in obs[i].names: 1412 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1413 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1414 new_samples = [] 1415 w_deltas = {} 1416 for name in sorted(obs[i].names): 1417 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1418 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1419 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1420 1421 if kwargs.get('all_configs'): 1422 new_weight = weight 1423 else: 1424 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1425 1426 result.append(tmp_obs / new_weight) 1427 result[-1].reweighted = True 1428 1429 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1432def correlate(obs_a, obs_b): 1433 """Correlate two observables. 1434 1435 Parameters 1436 ---------- 1437 obs_a : Obs 1438 First observable 1439 obs_b : Obs 1440 Second observable 1441 1442 Notes 1443 ----- 1444 Keep in mind to only correlate primary observables which have not been reweighted 1445 yet. The reweighting has to be applied after correlating the observables. 1446 Currently only works if ensembles are identical (this is not strictly necessary). 1447 """ 1448 1449 if sorted(obs_a.names) != sorted(obs_b.names): 1450 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1451 if len(obs_a.cov_names) or len(obs_b.cov_names): 1452 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1453 for name in obs_a.names: 1454 if obs_a.shape[name] != obs_b.shape[name]: 1455 raise Exception('Shapes of ensemble', name, 'do not fit') 1456 if obs_a.idl[name] != obs_b.idl[name]: 1457 raise Exception('idl of ensemble', name, 'do not fit') 1458 1459 if obs_a.reweighted is True: 1460 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1461 if obs_b.reweighted is True: 1462 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1463 1464 new_samples = [] 1465 new_idl = [] 1466 for name in sorted(obs_a.names): 1467 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1468 new_idl.append(obs_a.idl[name]) 1469 1470 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1471 o.reweighted = obs_a.reweighted or obs_b.reweighted 1472 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1475def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1476 r'''Calculates the error covariance matrix of a set of observables. 1477 1478 WARNING: This function should be used with care, especially for observables with support on multiple 1479 ensembles with differing autocorrelations. See the notes below for details. 1480 1481 The gamma method has to be applied first to all observables. 1482 1483 Parameters 1484 ---------- 1485 obs : list or numpy.ndarray 1486 List or one dimensional array of Obs 1487 visualize : bool 1488 If True plots the corresponding normalized correlation matrix (default False). 1489 correlation : bool 1490 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1491 smooth : None or int 1492 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1493 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1494 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1495 small ones. 1496 1497 Notes 1498 ----- 1499 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1500 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1501 in the absence of autocorrelation. 1502 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1503 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1504 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1505 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1506 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1507 ''' 1508 1509 length = len(obs) 1510 1511 max_samples = np.max([o.N for o in obs]) 1512 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1513 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1514 1515 cov = np.zeros((length, length)) 1516 for i in range(length): 1517 for j in range(i, length): 1518 cov[i, j] = _covariance_element(obs[i], obs[j]) 1519 cov = cov + cov.T - np.diag(np.diag(cov)) 1520 1521 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1522 1523 if isinstance(smooth, int): 1524 corr = _smooth_eigenvalues(corr, smooth) 1525 1526 if visualize: 1527 plt.matshow(corr, vmin=-1, vmax=1) 1528 plt.set_cmap('RdBu') 1529 plt.colorbar() 1530 plt.draw() 1531 1532 if correlation is True: 1533 return corr 1534 1535 errors = [o.dvalue for o in obs] 1536 cov = np.diag(errors) @ corr @ np.diag(errors) 1537 1538 eigenvalues = np.linalg.eigh(cov)[0] 1539 if not np.all(eigenvalues >= 0): 1540 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1541 1542 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1545def invert_corr_cov_cholesky(corr, inverrdiag): 1546 """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr` 1547 and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`. 1548 1549 Parameters 1550 ---------- 1551 corr : np.ndarray 1552 correlation matrix 1553 inverrdiag : np.ndarray 1554 diagonal matrix, the entries are the inverse errors of the data points considered 1555 """ 1556 1557 condn = np.linalg.cond(corr) 1558 if condn > 0.1 / np.finfo(float).eps: 1559 raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 1560 if condn > 1e13: 1561 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 1562 chol = np.linalg.cholesky(corr) 1563 chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True) 1564 1565 return chol_inv
Constructs a lower triangular matrix chol
via the Cholesky decomposition of the correlation matrix corr
and then returns the inverse covariance matrix chol_inv
as a lower triangular matrix by solving chol * x = inverrdiag
.
Parameters
- corr (np.ndarray): correlation matrix
- inverrdiag (np.ndarray): diagonal matrix, the entries are the inverse errors of the data points considered
1568def sort_corr(corr, kl, yd): 1569 """ Reorders a correlation matrix to match the alphabetical order of its underlying y data. 1570 1571 The ordering of the input correlation matrix `corr` is given by the list of keys `kl`. 1572 The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data 1573 that the correlation matrix is based on. 1574 This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr` 1575 according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds 1576 to the y data `yd` when arranged in an alphabetical order by its keys. 1577 1578 Parameters 1579 ---------- 1580 corr : np.ndarray 1581 A square correlation matrix constructed using the order of the y data specified by `kl`. 1582 The dimensions of `corr` should match the total number of y data points in `yd` combined. 1583 kl : list of str 1584 A list of keys that denotes the order in which the y data from `yd` was used to build the 1585 input correlation matrix `corr`. 1586 yd : dict of list 1587 A dictionary where each key corresponds to a unique identifier, and its value is a list of 1588 y data points. The total number of y data points across all keys must match the dimensions 1589 of `corr`. The lists in the dictionary can be lists of Obs. 1590 1591 Returns 1592 ------- 1593 np.ndarray 1594 A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys. 1595 1596 Example 1597 ------- 1598 >>> import numpy as np 1599 >>> import pyerrors as pe 1600 >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]]) 1601 >>> kl = ['b', 'a'] 1602 >>> yd = {'a': [1, 2], 'b': [3]} 1603 >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd) 1604 >>> print(sorted_corr) 1605 array([[1. , 0.3, 0.4], 1606 [0.3, 1. , 0.2], 1607 [0.4, 0.2, 1. ]]) 1608 1609 """ 1610 kl_sorted = sorted(kl) 1611 1612 posd = {} 1613 ofs = 0 1614 for ki, k in enumerate(kl): 1615 posd[k] = [i + ofs for i in range(len(yd[k]))] 1616 ofs += len(posd[k]) 1617 1618 mapping = [] 1619 for k in kl_sorted: 1620 for i in range(len(yd[k])): 1621 mapping.append(posd[k][i]) 1622 1623 corr_sorted = np.zeros_like(corr) 1624 for i in range(corr.shape[0]): 1625 for j in range(corr.shape[0]): 1626 corr_sorted[i][j] = corr[mapping[i]][mapping[j]] 1627 1628 return corr_sorted
Reorders a correlation matrix to match the alphabetical order of its underlying y data.
The ordering of the input correlation matrix corr
is given by the list of keys kl
.
The input dictionary yd
(with the same keys kl
) must contain the corresponding y data
that the correlation matrix is based on.
This function sorts the list of keys kl
alphabetically and sorts the matrix corr
according to this alphabetical order such that the sorted matrix corr_sorted
corresponds
to the y data yd
when arranged in an alphabetical order by its keys.
Parameters
- corr (np.ndarray):
A square correlation matrix constructed using the order of the y data specified by
kl
. The dimensions ofcorr
should match the total number of y data points inyd
combined. - kl (list of str):
A list of keys that denotes the order in which the y data from
yd
was used to build the input correlation matrixcorr
. - yd (dict of list):
A dictionary where each key corresponds to a unique identifier, and its value is a list of
y data points. The total number of y data points across all keys must match the dimensions
of
corr
. The lists in the dictionary can be lists of Obs.
Returns
- np.ndarray: A new, sorted correlation matrix that corresponds to the y data from
yd
when arranged alphabetically by its keys.
Example
>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
[0.3, 1. , 0.2],
[0.4, 0.2, 1. ]])
1708def import_jackknife(jacks, name, idl=None): 1709 """Imports jackknife samples and returns an Obs 1710 1711 Parameters 1712 ---------- 1713 jacks : numpy.ndarray 1714 numpy array containing the mean value as zeroth entry and 1715 the N jackknife samples as first to Nth entry. 1716 name : str 1717 name of the ensemble the samples are defined on. 1718 """ 1719 length = len(jacks) - 1 1720 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1721 samples = jacks[1:] @ prj 1722 mean = np.mean(samples) 1723 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1724 new_obs._value = jacks[0] 1725 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1728def import_bootstrap(boots, name, random_numbers): 1729 """Imports bootstrap samples and returns an Obs 1730 1731 Parameters 1732 ---------- 1733 boots : numpy.ndarray 1734 numpy array containing the mean value as zeroth entry and 1735 the N bootstrap samples as first to Nth entry. 1736 name : str 1737 name of the ensemble the samples are defined on. 1738 random_numbers : np.ndarray 1739 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, 1740 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo 1741 chain to be reconstructed. 1742 """ 1743 samples, length = random_numbers.shape 1744 if samples != len(boots) - 1: 1745 raise ValueError("Random numbers do not have the correct shape.") 1746 1747 if samples < length: 1748 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") 1749 1750 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 1751 1752 samples = scipy.linalg.lstsq(proj, boots[1:])[0] 1753 ret = Obs([samples], [name]) 1754 ret._value = boots[0] 1755 return ret
Imports bootstrap samples and returns an Obs
Parameters
- boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
- random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.
1758def merge_obs(list_of_obs): 1759 """Combine all observables in list_of_obs into one new observable 1760 1761 Parameters 1762 ---------- 1763 list_of_obs : list 1764 list of the Obs object to be combined 1765 1766 Notes 1767 ----- 1768 It is not possible to combine obs which are based on the same replicum 1769 """ 1770 replist = [item for obs in list_of_obs for item in obs.names] 1771 if (len(replist) == len(set(replist))) is False: 1772 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1773 if any([len(o.cov_names) for o in list_of_obs]): 1774 raise Exception('Not possible to merge data that contains covobs!') 1775 new_dict = {} 1776 idl_dict = {} 1777 for o in list_of_obs: 1778 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1779 for key in set(o.deltas) | set(o.r_values)}) 1780 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1781 1782 names = sorted(new_dict.keys()) 1783 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1784 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1785 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1788def cov_Obs(means, cov, name, grad=None): 1789 """Create an Obs based on mean(s) and a covariance matrix 1790 1791 Parameters 1792 ---------- 1793 mean : list of floats or float 1794 N mean value(s) of the new Obs 1795 cov : list or array 1796 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1797 name : str 1798 identifier for the covariance matrix 1799 grad : list or array 1800 Gradient of the Covobs wrt. the means belonging to cov. 1801 """ 1802 1803 def covobs_to_obs(co): 1804 """Make an Obs out of a Covobs 1805 1806 Parameters 1807 ---------- 1808 co : Covobs 1809 Covobs to be embedded into the Obs 1810 """ 1811 o = Obs([], [], means=[]) 1812 o._value = co.value 1813 o.names.append(co.name) 1814 o._covobs[co.name] = co 1815 o._dvalue = np.sqrt(co.errsq()) 1816 return o 1817 1818 ol = [] 1819 if isinstance(means, (float, int)): 1820 means = [means] 1821 1822 for i in range(len(means)): 1823 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1824 if ol[0].covobs[name].N != len(means): 1825 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1826 if len(ol) == 1: 1827 return ol[0] 1828 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.