pyerrors.obs
1import warnings 2import hashlib 3import pickle 4import numpy as np 5import autograd.numpy as anp # Thinly-wrapped numpy 6import scipy 7from autograd import jacobian 8import matplotlib.pyplot as plt 9from scipy.stats import skew, skewtest, kurtosis, kurtosistest 10import numdifftools as nd 11from itertools import groupby 12from .covobs import Covobs 13 14# Improve print output of numpy.ndarrays containing Obs objects. 15np.set_printoptions(formatter={'object': lambda x: str(x)}) 16 17 18class Obs: 19 """Class for a general observable. 20 21 Instances of Obs are the basic objects of a pyerrors error analysis. 22 They are initialized with a list which contains arrays of samples for 23 different ensembles/replica and another list of same length which contains 24 the names of the ensembles/replica. Mathematical operations can be 25 performed on instances. The result is another instance of Obs. The error of 26 an instance can be computed with the gamma_method. Also contains additional 27 methods for output and visualization of the error calculation. 28 29 Attributes 30 ---------- 31 S_global : float 32 Standard value for S (default 2.0) 33 S_dict : dict 34 Dictionary for S values. If an entry for a given ensemble 35 exists this overwrites the standard value for that ensemble. 36 tau_exp_global : float 37 Standard value for tau_exp (default 0.0) 38 tau_exp_dict : dict 39 Dictionary for tau_exp values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 N_sigma_global : float 42 Standard value for N_sigma (default 1.0) 43 N_sigma_dict : dict 44 Dictionary for N_sigma values. If an entry for a given ensemble exists 45 this overwrites the standard value for that ensemble. 46 """ 47 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 48 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 49 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 50 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 51 'idl', 'tag', '_covobs', '__dict__'] 52 53 S_global = 2.0 54 S_dict = {} 55 tau_exp_global = 0.0 56 tau_exp_dict = {} 57 N_sigma_global = 1.0 58 N_sigma_dict = {} 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise ValueError('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise ValueError('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise ValueError('Names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 if len(set([o.split('|')[0] for o in names])) > 1: 86 raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.') 87 else: 88 if not isinstance(names[0], str): 89 raise TypeError('All names have to be strings.') 90 if min(len(x) for x in samples) <= 4: 91 raise ValueError('Samples have to have at least 5 entries.') 92 93 self.names = sorted(names) 94 self.shape = {} 95 self.r_values = {} 96 self.deltas = {} 97 self._covobs = {} 98 99 self._value = 0 100 self.N = 0 101 self.idl = {} 102 if idl is not None: 103 for name, idx in sorted(zip(names, idl)): 104 if isinstance(idx, range): 105 self.idl[name] = idx 106 elif isinstance(idx, (list, np.ndarray)): 107 dc = np.unique(np.diff(idx)) 108 if np.any(dc < 0): 109 raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]]))) 110 elif np.any(dc == 0): 111 raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]]))) 112 if len(dc) == 1: 113 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 114 else: 115 self.idl[name] = list(idx) 116 else: 117 raise TypeError('incompatible type for idl[%s].' % (name)) 118 else: 119 for name, sample in sorted(zip(names, samples)): 120 self.idl[name] = range(1, len(sample) + 1) 121 122 if kwargs.get("means") is not None: 123 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 124 self.shape[name] = len(self.idl[name]) 125 self.N += self.shape[name] 126 self.r_values[name] = mean 127 self.deltas[name] = sample 128 else: 129 for name, sample in sorted(zip(names, samples)): 130 self.shape[name] = len(self.idl[name]) 131 self.N += self.shape[name] 132 if len(sample) != self.shape[name]: 133 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 134 self.r_values[name] = np.mean(sample) 135 self.deltas[name] = sample - self.r_values[name] 136 self._value += self.shape[name] * self.r_values[name] 137 self._value /= self.N 138 139 self._dvalue = 0.0 140 self.ddvalue = 0.0 141 self.reweighted = False 142 143 self.tag = None 144 145 @property 146 def value(self): 147 return self._value 148 149 @property 150 def dvalue(self): 151 return self._dvalue 152 153 @property 154 def e_names(self): 155 return sorted(set([o.split('|')[0] for o in self.names])) 156 157 @property 158 def cov_names(self): 159 return sorted(set([o for o in self.covobs.keys()])) 160 161 @property 162 def mc_names(self): 163 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 164 165 @property 166 def e_content(self): 167 res = {} 168 for e, e_name in enumerate(self.e_names): 169 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 170 if e_name in self.names: 171 res[e_name].append(e_name) 172 return res 173 174 @property 175 def covobs(self): 176 return self._covobs 177 178 def gamma_method(self, **kwargs): 179 """Estimate the error and related properties of the Obs. 180 181 Parameters 182 ---------- 183 S : float 184 specifies a custom value for the parameter S (default 2.0). 185 If set to 0 it is assumed that the data exhibits no 186 autocorrelation. In this case the error estimates coincides 187 with the sample standard error. 188 tau_exp : float 189 positive value triggers the critical slowing down analysis 190 (default 0.0). 191 N_sigma : float 192 number of standard deviations from zero until the tail is 193 attached to the autocorrelation function (default 1). 194 fft : bool 195 determines whether the fft algorithm is used for the computation 196 of the autocorrelation function (default True) 197 """ 198 199 e_content = self.e_content 200 self.e_dvalue = {} 201 self.e_ddvalue = {} 202 self.e_tauint = {} 203 self.e_dtauint = {} 204 self.e_windowsize = {} 205 self.e_n_tauint = {} 206 self.e_n_dtauint = {} 207 e_gamma = {} 208 self.e_rho = {} 209 self.e_drho = {} 210 self._dvalue = 0 211 self.ddvalue = 0 212 213 self.S = {} 214 self.tau_exp = {} 215 self.N_sigma = {} 216 217 if kwargs.get('fft') is False: 218 fft = False 219 else: 220 fft = True 221 222 def _parse_kwarg(kwarg_name): 223 if kwarg_name in kwargs: 224 tmp = kwargs.get(kwarg_name) 225 if isinstance(tmp, (int, float)): 226 if tmp < 0: 227 raise ValueError(kwarg_name + ' has to be larger or equal to 0.') 228 for e, e_name in enumerate(self.e_names): 229 getattr(self, kwarg_name)[e_name] = tmp 230 else: 231 raise TypeError(kwarg_name + ' is not in proper format.') 232 else: 233 for e, e_name in enumerate(self.e_names): 234 if e_name in getattr(Obs, kwarg_name + '_dict'): 235 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 236 else: 237 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 238 239 _parse_kwarg('S') 240 _parse_kwarg('tau_exp') 241 _parse_kwarg('N_sigma') 242 243 for e, e_name in enumerate(self.mc_names): 244 gapsize = _determine_gap(self, e_content, e_name) 245 246 r_length = [] 247 for r_name in e_content[e_name]: 248 if isinstance(self.idl[r_name], range): 249 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 250 else: 251 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 252 253 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 254 w_max = max(r_length) // 2 255 e_gamma[e_name] = np.zeros(w_max) 256 self.e_rho[e_name] = np.zeros(w_max) 257 self.e_drho[e_name] = np.zeros(w_max) 258 259 for r_name in e_content[e_name]: 260 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 261 262 gamma_div = np.zeros(w_max) 263 for r_name in e_content[e_name]: 264 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 265 gamma_div[gamma_div < 1] = 1.0 266 e_gamma[e_name] /= gamma_div[:w_max] 267 268 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 269 self.e_tauint[e_name] = 0.5 270 self.e_dtauint[e_name] = 0.0 271 self.e_dvalue[e_name] = 0.0 272 self.e_ddvalue[e_name] = 0.0 273 self.e_windowsize[e_name] = 0 274 continue 275 276 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 277 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 278 # Make sure no entry of tauint is smaller than 0.5 279 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 280 # hep-lat/0306017 eq. (42) 281 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 282 self.e_n_dtauint[e_name][0] = 0.0 283 284 def _compute_drho(i): 285 tmp = (self.e_rho[e_name][i + 1:w_max] 286 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 287 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 288 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 289 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 290 291 if self.tau_exp[e_name] > 0: 292 _compute_drho(1) 293 texp = self.tau_exp[e_name] 294 # Critical slowing down analysis 295 if w_max // 2 <= 1: 296 raise ValueError("Need at least 8 samples for tau_exp error analysis") 297 for n in range(1, w_max // 2): 298 _compute_drho(n + 1) 299 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 300 # Bias correction hep-lat/0306017 eq. (49) included 301 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 302 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 303 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 304 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 305 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 306 self.e_windowsize[e_name] = n 307 break 308 else: 309 if self.S[e_name] == 0.0: 310 self.e_tauint[e_name] = 0.5 311 self.e_dtauint[e_name] = 0.0 312 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 313 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 314 self.e_windowsize[e_name] = 0 315 else: 316 # Standard automatic windowing procedure 317 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 318 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 319 for n in range(1, w_max): 320 if g_w[n - 1] < 0 or n >= w_max - 1: 321 _compute_drho(n) 322 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 323 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 324 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 325 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 326 self.e_windowsize[e_name] = n 327 break 328 329 self._dvalue += self.e_dvalue[e_name] ** 2 330 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 331 332 for e_name in self.cov_names: 333 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 334 self.e_ddvalue[e_name] = 0 335 self._dvalue += self.e_dvalue[e_name]**2 336 337 self._dvalue = np.sqrt(self._dvalue) 338 if self._dvalue == 0.0: 339 self.ddvalue = 0.0 340 else: 341 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 342 return 343 344 gm = gamma_method 345 346 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 347 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 348 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 349 350 Parameters 351 ---------- 352 deltas : list 353 List of fluctuations 354 idx : list 355 List or range of configurations on which the deltas are defined. 356 shape : int 357 Number of configurations in idx. 358 w_max : int 359 Upper bound for the summation window. 360 fft : bool 361 determines whether the fft algorithm is used for the computation 362 of the autocorrelation function. 363 gapsize : int 364 The target distance between two configurations. If longer distances 365 are found in idx, the data is expanded. 366 """ 367 gamma = np.zeros(w_max) 368 deltas = _expand_deltas(deltas, idx, shape, gapsize) 369 new_shape = len(deltas) 370 if fft: 371 max_gamma = min(new_shape, w_max) 372 # The padding for the fft has to be even 373 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 374 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 375 else: 376 for n in range(w_max): 377 if new_shape - n >= 0: 378 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 379 380 return gamma 381 382 def details(self, ens_content=True): 383 """Output detailed properties of the Obs. 384 385 Parameters 386 ---------- 387 ens_content : bool 388 print details about the ensembles and replica if true. 389 """ 390 if self.tag is not None: 391 print("Description:", self.tag) 392 if not hasattr(self, 'e_dvalue'): 393 print('Result\t %3.8e' % (self.value)) 394 else: 395 if self.value == 0.0: 396 percentage = np.nan 397 else: 398 percentage = np.abs(self._dvalue / self.value) * 100 399 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 400 if len(self.e_names) > 1: 401 print(' Ensemble errors:') 402 e_content = self.e_content 403 for e_name in self.mc_names: 404 gap = _determine_gap(self, e_content, e_name) 405 406 if len(self.e_names) > 1: 407 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 408 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 409 tau_string += f" in units of {gap} config" 410 if gap > 1: 411 tau_string += "s" 412 if self.tau_exp[e_name] > 0: 413 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 414 else: 415 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 416 print(tau_string) 417 for e_name in self.cov_names: 418 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 419 if ens_content is True: 420 if len(self.e_names) == 1: 421 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 422 else: 423 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 424 my_string_list = [] 425 for key, value in sorted(self.e_content.items()): 426 if key not in self.covobs: 427 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 428 if len(value) == 1: 429 my_string += f': {self.shape[value[0]]} configurations' 430 if isinstance(self.idl[value[0]], range): 431 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 432 else: 433 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 434 else: 435 sublist = [] 436 for v in value: 437 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 438 my_substring += f': {self.shape[v]} configurations' 439 if isinstance(self.idl[v], range): 440 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 441 else: 442 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 443 sublist.append(my_substring) 444 445 my_string += '\n' + '\n'.join(sublist) 446 else: 447 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 448 my_string_list.append(my_string) 449 print('\n'.join(my_string_list)) 450 451 def reweight(self, weight): 452 """Reweight the obs with given rewighting factors. 453 454 Parameters 455 ---------- 456 weight : Obs 457 Reweighting factor. An Observable that has to be defined on a superset of the 458 configurations in obs[i].idl for all i. 459 all_configs : bool 460 if True, the reweighted observables are normalized by the average of 461 the reweighting factor on all configurations in weight.idl and not 462 on the configurations in obs[i].idl. Default False. 463 """ 464 return reweight(weight, [self])[0] 465 466 def is_zero_within_error(self, sigma=1): 467 """Checks whether the observable is zero within 'sigma' standard errors. 468 469 Parameters 470 ---------- 471 sigma : int 472 Number of standard errors used for the check. 473 474 Works only properly when the gamma method was run. 475 """ 476 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 477 478 def is_zero(self, atol=1e-10): 479 """Checks whether the observable is zero within a given tolerance. 480 481 Parameters 482 ---------- 483 atol : float 484 Absolute tolerance (for details see numpy documentation). 485 """ 486 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 487 488 def plot_tauint(self, save=None): 489 """Plot integrated autocorrelation time for each ensemble. 490 491 Parameters 492 ---------- 493 save : str 494 saves the figure to a file named 'save' if. 495 """ 496 if not hasattr(self, 'e_dvalue'): 497 raise Exception('Run the gamma method first.') 498 499 for e, e_name in enumerate(self.mc_names): 500 fig = plt.figure() 501 plt.xlabel(r'$W$') 502 plt.ylabel(r'$\tau_\mathrm{int}$') 503 length = int(len(self.e_n_tauint[e_name])) 504 if self.tau_exp[e_name] > 0: 505 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 506 x_help = np.arange(2 * self.tau_exp[e_name]) 507 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 508 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 509 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 510 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 511 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 512 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 513 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 514 else: 515 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 516 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 517 518 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 519 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 520 plt.legend() 521 plt.xlim(-0.5, xmax) 522 ylim = plt.ylim() 523 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 524 plt.draw() 525 if save: 526 fig.savefig(save + "_" + str(e)) 527 528 def plot_rho(self, save=None): 529 """Plot normalized autocorrelation function time for each ensemble. 530 531 Parameters 532 ---------- 533 save : str 534 saves the figure to a file named 'save' if. 535 """ 536 if not hasattr(self, 'e_dvalue'): 537 raise Exception('Run the gamma method first.') 538 for e, e_name in enumerate(self.mc_names): 539 fig = plt.figure() 540 plt.xlabel('W') 541 plt.ylabel('rho') 542 length = int(len(self.e_drho[e_name])) 543 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 544 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 545 if self.tau_exp[e_name] > 0: 546 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 547 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 548 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 549 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 550 else: 551 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 552 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 553 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 554 plt.xlim(-0.5, xmax) 555 plt.draw() 556 if save: 557 fig.savefig(save + "_" + str(e)) 558 559 def plot_rep_dist(self): 560 """Plot replica distribution for each ensemble with more than one replicum.""" 561 if not hasattr(self, 'e_dvalue'): 562 raise Exception('Run the gamma method first.') 563 for e, e_name in enumerate(self.mc_names): 564 if len(self.e_content[e_name]) == 1: 565 print('No replica distribution for a single replicum (', e_name, ')') 566 continue 567 r_length = [] 568 sub_r_mean = 0 569 for r, r_name in enumerate(self.e_content[e_name]): 570 r_length.append(len(self.deltas[r_name])) 571 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 572 e_N = np.sum(r_length) 573 sub_r_mean /= e_N 574 arr = np.zeros(len(self.e_content[e_name])) 575 for r, r_name in enumerate(self.e_content[e_name]): 576 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 577 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 578 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 579 plt.draw() 580 581 def plot_history(self, expand=True): 582 """Plot derived Monte Carlo history for each ensemble 583 584 Parameters 585 ---------- 586 expand : bool 587 show expanded history for irregular Monte Carlo chains (default: True). 588 """ 589 for e, e_name in enumerate(self.mc_names): 590 plt.figure() 591 r_length = [] 592 tmp = [] 593 tmp_expanded = [] 594 for r, r_name in enumerate(self.e_content[e_name]): 595 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 596 if expand: 597 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 598 r_length.append(len(tmp_expanded[-1])) 599 else: 600 r_length.append(len(tmp[-1])) 601 e_N = np.sum(r_length) 602 x = np.arange(e_N) 603 y_test = np.concatenate(tmp, axis=0) 604 if expand: 605 y = np.concatenate(tmp_expanded, axis=0) 606 else: 607 y = y_test 608 plt.errorbar(x, y, fmt='.', markersize=3) 609 plt.xlim(-0.5, e_N - 0.5) 610 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 611 plt.draw() 612 613 def plot_piechart(self, save=None): 614 """Plot piechart which shows the fractional contribution of each 615 ensemble to the error and returns a dictionary containing the fractions. 616 617 Parameters 618 ---------- 619 save : str 620 saves the figure to a file named 'save' if. 621 """ 622 if not hasattr(self, 'e_dvalue'): 623 raise Exception('Run the gamma method first.') 624 if np.isclose(0.0, self._dvalue, atol=1e-15): 625 raise ValueError('Error is 0.0') 626 labels = self.e_names 627 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 628 fig1, ax1 = plt.subplots() 629 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 630 ax1.axis('equal') 631 plt.draw() 632 if save: 633 fig1.savefig(save) 634 635 return dict(zip(labels, sizes)) 636 637 def dump(self, filename, datatype="json.gz", description="", **kwargs): 638 """Dump the Obs to a file 'name' of chosen format. 639 640 Parameters 641 ---------- 642 filename : str 643 name of the file to be saved. 644 datatype : str 645 Format of the exported file. Supported formats include 646 "json.gz" and "pickle" 647 description : str 648 Description for output file, only relevant for json.gz format. 649 path : str 650 specifies a custom path for the file (default '.') 651 """ 652 if 'path' in kwargs: 653 file_name = kwargs.get('path') + '/' + filename 654 else: 655 file_name = filename 656 657 if datatype == "json.gz": 658 from .input.json import dump_to_json 659 dump_to_json([self], file_name, description=description) 660 elif datatype == "pickle": 661 with open(file_name + '.p', 'wb') as fb: 662 pickle.dump(self, fb) 663 else: 664 raise TypeError("Unknown datatype " + str(datatype)) 665 666 def export_jackknife(self): 667 """Export jackknife samples from the Obs 668 669 Returns 670 ------- 671 numpy.ndarray 672 Returns a numpy array of length N + 1 where N is the number of samples 673 for the given ensemble and replicum. The zeroth entry of the array contains 674 the mean value of the Obs, entries 1 to N contain the N jackknife samples 675 derived from the Obs. The current implementation only works for observables 676 defined on exactly one ensemble and replicum. The derived jackknife samples 677 should agree with samples from a full jackknife analysis up to O(1/N). 678 """ 679 680 if len(self.names) != 1: 681 raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 682 683 name = self.names[0] 684 full_data = self.deltas[name] + self.r_values[name] 685 n = full_data.size 686 mean = self.value 687 tmp_jacks = np.zeros(n + 1) 688 tmp_jacks[0] = mean 689 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 690 return tmp_jacks 691 692 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 693 """Export bootstrap samples from the Obs 694 695 Parameters 696 ---------- 697 samples : int 698 Number of bootstrap samples to generate. 699 random_numbers : np.ndarray 700 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 701 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 702 save_rng : str 703 Save the random numbers to a file if a path is specified. 704 705 Returns 706 ------- 707 numpy.ndarray 708 Returns a numpy array of length N + 1 where N is the number of samples 709 for the given ensemble and replicum. The zeroth entry of the array contains 710 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 711 derived from the Obs. The current implementation only works for observables 712 defined on exactly one ensemble and replicum. The derived bootstrap samples 713 should agree with samples from a full bootstrap analysis up to O(1/N). 714 """ 715 if len(self.names) != 1: 716 raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 717 718 name = self.names[0] 719 length = self.N 720 721 if random_numbers is None: 722 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 723 rng = np.random.default_rng(seed) 724 random_numbers = rng.integers(0, length, size=(samples, length)) 725 726 if save_rng is not None: 727 np.savetxt(save_rng, random_numbers, fmt='%i') 728 729 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 730 ret = np.zeros(samples + 1) 731 ret[0] = self.value 732 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 733 return ret 734 735 def __float__(self): 736 return float(self.value) 737 738 def __repr__(self): 739 return 'Obs[' + str(self) + ']' 740 741 def __str__(self): 742 return _format_uncertainty(self.value, self._dvalue) 743 744 def __format__(self, format_type): 745 if format_type == "": 746 significance = 2 747 else: 748 significance = int(float(format_type.replace("+", "").replace("-", ""))) 749 my_str = _format_uncertainty(self.value, self._dvalue, 750 significance=significance) 751 for char in ["+", " "]: 752 if format_type.startswith(char): 753 if my_str[0] != "-": 754 my_str = char + my_str 755 return my_str 756 757 def __hash__(self): 758 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 759 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 760 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 761 hash_tuple += tuple([o.encode() for o in self.names]) 762 m = hashlib.md5() 763 [m.update(o) for o in hash_tuple] 764 return int(m.hexdigest(), 16) & 0xFFFFFFFF 765 766 # Overload comparisons 767 def __lt__(self, other): 768 return self.value < other 769 770 def __le__(self, other): 771 return self.value <= other 772 773 def __gt__(self, other): 774 return self.value > other 775 776 def __ge__(self, other): 777 return self.value >= other 778 779 def __eq__(self, other): 780 if other is None: 781 return False 782 return (self - other).is_zero() 783 784 # Overload math operations 785 def __add__(self, y): 786 if isinstance(y, Obs): 787 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 788 else: 789 if isinstance(y, np.ndarray): 790 return np.array([self + o for o in y]) 791 elif isinstance(y, complex): 792 return CObs(self, 0) + y 793 elif y.__class__.__name__ in ['Corr', 'CObs']: 794 return NotImplemented 795 else: 796 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 797 798 def __radd__(self, y): 799 return self + y 800 801 def __mul__(self, y): 802 if isinstance(y, Obs): 803 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 804 else: 805 if isinstance(y, np.ndarray): 806 return np.array([self * o for o in y]) 807 elif isinstance(y, complex): 808 return CObs(self * y.real, self * y.imag) 809 elif y.__class__.__name__ in ['Corr', 'CObs']: 810 return NotImplemented 811 else: 812 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 813 814 def __rmul__(self, y): 815 return self * y 816 817 def __sub__(self, y): 818 if isinstance(y, Obs): 819 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 820 else: 821 if isinstance(y, np.ndarray): 822 return np.array([self - o for o in y]) 823 elif y.__class__.__name__ in ['Corr', 'CObs']: 824 return NotImplemented 825 else: 826 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 827 828 def __rsub__(self, y): 829 return -1 * (self - y) 830 831 def __pos__(self): 832 return self 833 834 def __neg__(self): 835 return -1 * self 836 837 def __truediv__(self, y): 838 if isinstance(y, Obs): 839 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 840 else: 841 if isinstance(y, np.ndarray): 842 return np.array([self / o for o in y]) 843 elif y.__class__.__name__ in ['Corr', 'CObs']: 844 return NotImplemented 845 else: 846 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 847 848 def __rtruediv__(self, y): 849 if isinstance(y, Obs): 850 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 851 else: 852 if isinstance(y, np.ndarray): 853 return np.array([o / self for o in y]) 854 elif y.__class__.__name__ in ['Corr', 'CObs']: 855 return NotImplemented 856 else: 857 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 858 859 def __pow__(self, y): 860 if isinstance(y, Obs): 861 return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)]) 862 else: 863 return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)]) 864 865 def __rpow__(self, y): 866 return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)]) 867 868 def __abs__(self): 869 return derived_observable(lambda x: anp.abs(x[0]), [self]) 870 871 # Overload numpy functions 872 def sqrt(self): 873 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 874 875 def log(self): 876 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 877 878 def exp(self): 879 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 880 881 def sin(self): 882 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 883 884 def cos(self): 885 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 886 887 def tan(self): 888 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 889 890 def arcsin(self): 891 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 892 893 def arccos(self): 894 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 895 896 def arctan(self): 897 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 898 899 def sinh(self): 900 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 901 902 def cosh(self): 903 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 904 905 def tanh(self): 906 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 907 908 def arcsinh(self): 909 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 910 911 def arccosh(self): 912 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 913 914 def arctanh(self): 915 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 916 917 918class CObs: 919 """Class for a complex valued observable.""" 920 __slots__ = ['_real', '_imag', 'tag'] 921 922 def __init__(self, real, imag=0.0): 923 self._real = real 924 self._imag = imag 925 self.tag = None 926 927 @property 928 def real(self): 929 return self._real 930 931 @property 932 def imag(self): 933 return self._imag 934 935 def gamma_method(self, **kwargs): 936 """Executes the gamma_method for the real and the imaginary part.""" 937 if isinstance(self.real, Obs): 938 self.real.gamma_method(**kwargs) 939 if isinstance(self.imag, Obs): 940 self.imag.gamma_method(**kwargs) 941 942 def is_zero(self): 943 """Checks whether both real and imaginary part are zero within machine precision.""" 944 return self.real == 0.0 and self.imag == 0.0 945 946 def conjugate(self): 947 return CObs(self.real, -self.imag) 948 949 def __add__(self, other): 950 if isinstance(other, np.ndarray): 951 return other + self 952 elif hasattr(other, 'real') and hasattr(other, 'imag'): 953 return CObs(self.real + other.real, 954 self.imag + other.imag) 955 else: 956 return CObs(self.real + other, self.imag) 957 958 def __radd__(self, y): 959 return self + y 960 961 def __sub__(self, other): 962 if isinstance(other, np.ndarray): 963 return -1 * (other - self) 964 elif hasattr(other, 'real') and hasattr(other, 'imag'): 965 return CObs(self.real - other.real, self.imag - other.imag) 966 else: 967 return CObs(self.real - other, self.imag) 968 969 def __rsub__(self, other): 970 return -1 * (self - other) 971 972 def __mul__(self, other): 973 if isinstance(other, np.ndarray): 974 return other * self 975 elif hasattr(other, 'real') and hasattr(other, 'imag'): 976 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 977 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 978 [self.real, other.real, self.imag, other.imag], 979 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 980 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 981 [self.real, other.real, self.imag, other.imag], 982 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 983 elif getattr(other, 'imag', 0) != 0: 984 return CObs(self.real * other.real - self.imag * other.imag, 985 self.imag * other.real + self.real * other.imag) 986 else: 987 return CObs(self.real * other.real, self.imag * other.real) 988 else: 989 return CObs(self.real * other, self.imag * other) 990 991 def __rmul__(self, other): 992 return self * other 993 994 def __truediv__(self, other): 995 if isinstance(other, np.ndarray): 996 return 1 / (other / self) 997 elif hasattr(other, 'real') and hasattr(other, 'imag'): 998 r = other.real ** 2 + other.imag ** 2 999 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 1000 else: 1001 return CObs(self.real / other, self.imag / other) 1002 1003 def __rtruediv__(self, other): 1004 r = self.real ** 2 + self.imag ** 2 1005 if hasattr(other, 'real') and hasattr(other, 'imag'): 1006 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 1007 else: 1008 return CObs(self.real * other / r, -self.imag * other / r) 1009 1010 def __abs__(self): 1011 return np.sqrt(self.real**2 + self.imag**2) 1012 1013 def __pos__(self): 1014 return self 1015 1016 def __neg__(self): 1017 return -1 * self 1018 1019 def __eq__(self, other): 1020 return self.real == other.real and self.imag == other.imag 1021 1022 def __str__(self): 1023 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 1024 1025 def __repr__(self): 1026 return 'CObs[' + str(self) + ']' 1027 1028 def __format__(self, format_type): 1029 if format_type == "": 1030 significance = 2 1031 format_type = "2" 1032 else: 1033 significance = int(float(format_type.replace("+", "").replace("-", ""))) 1034 return f"({self.real:{format_type}}{self.imag:+{significance}}j)" 1035 1036 1037def gamma_method(x, **kwargs): 1038 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1039 1040 See docstring of pe.Obs.gamma_method for details. 1041 """ 1042 return np.vectorize(lambda o: o.gm(**kwargs))(x) 1043 1044 1045gm = gamma_method 1046 1047 1048def _format_uncertainty(value, dvalue, significance=2): 1049 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 1050 if dvalue == 0.0 or (not np.isfinite(dvalue)): 1051 return str(value) 1052 if not isinstance(significance, int): 1053 raise TypeError("significance needs to be an integer.") 1054 if significance < 1: 1055 raise ValueError("significance needs to be larger than zero.") 1056 fexp = np.floor(np.log10(dvalue)) 1057 if fexp < 0.0: 1058 return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f') 1059 elif fexp == 0.0: 1060 return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})" 1061 else: 1062 return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})" 1063 1064 1065def _expand_deltas(deltas, idx, shape, gapsize): 1066 """Expand deltas defined on idx to a regular range with spacing gapsize between two 1067 configurations and where holes are filled by 0. 1068 If idx is of type range, the deltas are not changed if the idx.step == gapsize. 1069 1070 Parameters 1071 ---------- 1072 deltas : list 1073 List of fluctuations 1074 idx : list 1075 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1076 shape : int 1077 Number of configs in idx. 1078 gapsize : int 1079 The target distance between two configurations. If longer distances 1080 are found in idx, the data is expanded. 1081 """ 1082 if isinstance(idx, range): 1083 if (idx.step == gapsize): 1084 return deltas 1085 ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize) 1086 for i in range(shape): 1087 ret[(idx[i] - idx[0]) // gapsize] = deltas[i] 1088 return ret 1089 1090 1091def _merge_idx(idl): 1092 """Returns the union of all lists in idl as range or sorted list 1093 1094 Parameters 1095 ---------- 1096 idl : list 1097 List of lists or ranges. 1098 """ 1099 1100 if _check_lists_equal(idl): 1101 return idl[0] 1102 1103 idunion = sorted(set().union(*idl)) 1104 1105 # Check whether idunion can be expressed as range 1106 idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0]) 1107 idtest = [list(idrange), idunion] 1108 if _check_lists_equal(idtest): 1109 return idrange 1110 1111 return idunion 1112 1113 1114def _intersection_idx(idl): 1115 """Returns the intersection of all lists in idl as range or sorted list 1116 1117 Parameters 1118 ---------- 1119 idl : list 1120 List of lists or ranges. 1121 """ 1122 1123 if _check_lists_equal(idl): 1124 return idl[0] 1125 1126 idinter = sorted(set.intersection(*[set(o) for o in idl])) 1127 1128 # Check whether idinter can be expressed as range 1129 try: 1130 idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0]) 1131 idtest = [list(idrange), idinter] 1132 if _check_lists_equal(idtest): 1133 return idrange 1134 except IndexError: 1135 pass 1136 1137 return idinter 1138 1139 1140def _expand_deltas_for_merge(deltas, idx, shape, new_idx, scalefactor): 1141 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1142 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1143 common divisor of the step sizes is used as new step size. 1144 1145 Parameters 1146 ---------- 1147 deltas : list 1148 List of fluctuations 1149 idx : list 1150 List or range of configs on which the deltas are defined. 1151 Has to be a subset of new_idx and has to be sorted in ascending order. 1152 shape : list 1153 Number of configs in idx. 1154 new_idx : list 1155 List of configs that defines the new range, has to be sorted in ascending order. 1156 scalefactor : float 1157 An additional scaling factor that can be applied to scale the fluctuations, 1158 e.g., when Obs with differing numbers of replica are merged. 1159 """ 1160 if type(idx) is range and type(new_idx) is range: 1161 if idx == new_idx: 1162 if scalefactor == 1: 1163 return deltas 1164 else: 1165 return deltas * scalefactor 1166 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1167 for i in range(shape): 1168 ret[idx[i] - new_idx[0]] = deltas[i] 1169 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) * scalefactor 1170 1171 1172def derived_observable(func, data, array_mode=False, **kwargs): 1173 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1174 1175 Parameters 1176 ---------- 1177 func : object 1178 arbitrary function of the form func(data, **kwargs). For the 1179 automatic differentiation to work, all numpy functions have to have 1180 the autograd wrapper (use 'import autograd.numpy as anp'). 1181 data : list 1182 list of Obs, e.g. [obs1, obs2, obs3]. 1183 num_grad : bool 1184 if True, numerical derivatives are used instead of autograd 1185 (default False). To control the numerical differentiation the 1186 kwargs of numdifftools.step_generators.MaxStepGenerator 1187 can be used. 1188 man_grad : list 1189 manually supply a list or an array which contains the jacobian 1190 of func. Use cautiously, supplying the wrong derivative will 1191 not be intercepted. 1192 1193 Notes 1194 ----- 1195 For simple mathematical operations it can be practical to use anonymous 1196 functions. For the ratio of two observables one can e.g. use 1197 1198 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1199 """ 1200 1201 data = np.asarray(data) 1202 raveled_data = data.ravel() 1203 1204 # Workaround for matrix operations containing non Obs data 1205 if not all(isinstance(x, Obs) for x in raveled_data): 1206 for i in range(len(raveled_data)): 1207 if isinstance(raveled_data[i], (int, float)): 1208 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1209 1210 allcov = {} 1211 for o in raveled_data: 1212 for name in o.cov_names: 1213 if name in allcov: 1214 if not np.allclose(allcov[name], o.covobs[name].cov): 1215 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1216 else: 1217 allcov[name] = o.covobs[name].cov 1218 1219 n_obs = len(raveled_data) 1220 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1221 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1222 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1223 1224 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1225 1226 if data.ndim == 1: 1227 values = np.array([o.value for o in data]) 1228 else: 1229 values = np.vectorize(lambda x: x.value)(data) 1230 1231 new_values = func(values, **kwargs) 1232 1233 multi = int(isinstance(new_values, np.ndarray)) 1234 1235 new_r_values = {} 1236 new_idl_d = {} 1237 for name in new_sample_names: 1238 idl = [] 1239 tmp_values = np.zeros(n_obs) 1240 for i, item in enumerate(raveled_data): 1241 tmp_values[i] = item.r_values.get(name, item.value) 1242 tmp_idl = item.idl.get(name) 1243 if tmp_idl is not None: 1244 idl.append(tmp_idl) 1245 if multi > 0: 1246 tmp_values = np.array(tmp_values).reshape(data.shape) 1247 new_r_values[name] = func(tmp_values, **kwargs) 1248 new_idl_d[name] = _merge_idx(idl) 1249 1250 def _compute_scalefactor_missing_rep(obs): 1251 """ 1252 Computes the scale factor that is to be multiplied with the deltas 1253 in the case where Obs with different subsets of replica are merged. 1254 Returns a dictionary with the scale factor for each Monte Carlo name. 1255 1256 Parameters 1257 ---------- 1258 obs : Obs 1259 The observable corresponding to the deltas that are to be scaled 1260 """ 1261 scalef_d = {} 1262 for mc_name in obs.mc_names: 1263 mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')] 1264 new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')] 1265 if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d): 1266 scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d]) 1267 return scalef_d 1268 1269 if 'man_grad' in kwargs: 1270 deriv = np.asarray(kwargs.get('man_grad')) 1271 if new_values.shape + data.shape != deriv.shape: 1272 raise ValueError('Manual derivative does not have correct shape.') 1273 elif kwargs.get('num_grad') is True: 1274 if multi > 0: 1275 raise Exception('Multi mode currently not supported for numerical derivative') 1276 options = { 1277 'base_step': 0.1, 1278 'step_ratio': 2.5} 1279 for key in options.keys(): 1280 kwarg = kwargs.get(key) 1281 if kwarg is not None: 1282 options[key] = kwarg 1283 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1284 if tmp_df.size == 1: 1285 deriv = np.array([tmp_df.real]) 1286 else: 1287 deriv = tmp_df.real 1288 else: 1289 deriv = jacobian(func)(values, **kwargs) 1290 1291 final_result = np.zeros(new_values.shape, dtype=object) 1292 1293 if array_mode is True: 1294 1295 class _Zero_grad(): 1296 def __init__(self, N): 1297 self.grad = np.zeros((N, 1)) 1298 1299 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1300 d_extracted = {} 1301 g_extracted = {} 1302 for name in new_sample_names: 1303 d_extracted[name] = [] 1304 ens_length = len(new_idl_d[name]) 1305 for i_dat, dat in enumerate(data): 1306 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1307 for name in new_cov_names: 1308 g_extracted[name] = [] 1309 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1310 for i_dat, dat in enumerate(data): 1311 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1312 1313 for i_val, new_val in np.ndenumerate(new_values): 1314 new_deltas = {} 1315 new_grad = {} 1316 if array_mode is True: 1317 for name in new_sample_names: 1318 ens_length = d_extracted[name][0].shape[-1] 1319 new_deltas[name] = np.zeros(ens_length) 1320 for i_dat, dat in enumerate(d_extracted[name]): 1321 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1322 for name in new_cov_names: 1323 new_grad[name] = 0 1324 for i_dat, dat in enumerate(g_extracted[name]): 1325 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1326 else: 1327 for j_obs, obs in np.ndenumerate(data): 1328 scalef_d = _compute_scalefactor_missing_rep(obs) 1329 for name in obs.names: 1330 if name in obs.cov_names: 1331 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1332 else: 1333 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1)) 1334 1335 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1336 1337 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1338 raise ValueError('The same name has been used for deltas and covobs!') 1339 new_samples = [] 1340 new_means = [] 1341 new_idl = [] 1342 new_names_obs = [] 1343 for name in new_names: 1344 if name not in new_covobs: 1345 new_samples.append(new_deltas[name]) 1346 new_idl.append(new_idl_d[name]) 1347 new_means.append(new_r_values[name][i_val]) 1348 new_names_obs.append(name) 1349 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1350 for name in new_covobs: 1351 final_result[i_val].names.append(name) 1352 final_result[i_val]._covobs = new_covobs 1353 final_result[i_val]._value = new_val 1354 final_result[i_val].reweighted = reweighted 1355 1356 if multi == 0: 1357 final_result = final_result.item() 1358 1359 return final_result 1360 1361 1362def _reduce_deltas(deltas, idx_old, idx_new): 1363 """Extract deltas defined on idx_old on all configs of idx_new. 1364 1365 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1366 are ordered in an ascending order. 1367 1368 Parameters 1369 ---------- 1370 deltas : list 1371 List of fluctuations 1372 idx_old : list 1373 List or range of configs on which the deltas are defined 1374 idx_new : list 1375 List of configs for which we want to extract the deltas. 1376 Has to be a subset of idx_old. 1377 """ 1378 if not len(deltas) == len(idx_old): 1379 raise ValueError('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1380 if type(idx_old) is range and type(idx_new) is range: 1381 if idx_old == idx_new: 1382 return deltas 1383 if _check_lists_equal([idx_old, idx_new]): 1384 return deltas 1385 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1386 if len(indices) < len(idx_new): 1387 raise ValueError('Error in _reduce_deltas: Config of idx_new not in idx_old') 1388 return np.array(deltas)[indices] 1389 1390 1391def reweight(weight, obs, **kwargs): 1392 """Reweight a list of observables. 1393 1394 Parameters 1395 ---------- 1396 weight : Obs 1397 Reweighting factor. An Observable that has to be defined on a superset of the 1398 configurations in obs[i].idl for all i. 1399 obs : list 1400 list of Obs, e.g. [obs1, obs2, obs3]. 1401 all_configs : bool 1402 if True, the reweighted observables are normalized by the average of 1403 the reweighting factor on all configurations in weight.idl and not 1404 on the configurations in obs[i].idl. Default False. 1405 """ 1406 result = [] 1407 for i in range(len(obs)): 1408 if len(obs[i].cov_names): 1409 raise ValueError('Error: Not possible to reweight an Obs that contains covobs!') 1410 if not set(obs[i].names).issubset(weight.names): 1411 raise ValueError('Error: Ensembles do not fit') 1412 if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1: 1413 raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.') 1414 for name in obs[i].names: 1415 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1416 raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1417 new_samples = [] 1418 w_deltas = {} 1419 for name in sorted(obs[i].names): 1420 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1421 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1422 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1423 1424 if kwargs.get('all_configs'): 1425 new_weight = weight 1426 else: 1427 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1428 1429 result.append(tmp_obs / new_weight) 1430 result[-1].reweighted = True 1431 1432 return result 1433 1434 1435def correlate(obs_a, obs_b): 1436 """Correlate two observables. 1437 1438 Parameters 1439 ---------- 1440 obs_a : Obs 1441 First observable 1442 obs_b : Obs 1443 Second observable 1444 1445 Notes 1446 ----- 1447 Keep in mind to only correlate primary observables which have not been reweighted 1448 yet. The reweighting has to be applied after correlating the observables. 1449 Only works if a single ensemble is present in the Obs. 1450 Currently only works if ensemble content is identical (this is not strictly necessary). 1451 """ 1452 1453 if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1: 1454 raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.') 1455 if sorted(obs_a.names) != sorted(obs_b.names): 1456 raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1457 if len(obs_a.cov_names) or len(obs_b.cov_names): 1458 raise ValueError('Error: Not possible to correlate Obs that contain covobs!') 1459 for name in obs_a.names: 1460 if obs_a.shape[name] != obs_b.shape[name]: 1461 raise ValueError('Shapes of ensemble', name, 'do not fit') 1462 if obs_a.idl[name] != obs_b.idl[name]: 1463 raise ValueError('idl of ensemble', name, 'do not fit') 1464 1465 if obs_a.reweighted is True: 1466 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1467 if obs_b.reweighted is True: 1468 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1469 1470 new_samples = [] 1471 new_idl = [] 1472 for name in sorted(obs_a.names): 1473 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1474 new_idl.append(obs_a.idl[name]) 1475 1476 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1477 o.reweighted = obs_a.reweighted or obs_b.reweighted 1478 return o 1479 1480 1481def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1482 r'''Calculates the error covariance matrix of a set of observables. 1483 1484 WARNING: This function should be used with care, especially for observables with support on multiple 1485 ensembles with differing autocorrelations. See the notes below for details. 1486 1487 The gamma method has to be applied first to all observables. 1488 1489 Parameters 1490 ---------- 1491 obs : list or numpy.ndarray 1492 List or one dimensional array of Obs 1493 visualize : bool 1494 If True plots the corresponding normalized correlation matrix (default False). 1495 correlation : bool 1496 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1497 smooth : None or int 1498 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1499 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1500 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1501 small ones. 1502 1503 Notes 1504 ----- 1505 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1506 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1507 in the absence of autocorrelation. 1508 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1509 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1510 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1511 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1512 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1513 ''' 1514 1515 length = len(obs) 1516 1517 max_samples = np.max([o.N for o in obs]) 1518 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1519 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1520 1521 cov = np.zeros((length, length)) 1522 for i in range(length): 1523 for j in range(i, length): 1524 cov[i, j] = _covariance_element(obs[i], obs[j]) 1525 cov = cov + cov.T - np.diag(np.diag(cov)) 1526 1527 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1528 1529 if isinstance(smooth, int): 1530 corr = _smooth_eigenvalues(corr, smooth) 1531 1532 if visualize: 1533 plt.matshow(corr, vmin=-1, vmax=1) 1534 plt.set_cmap('RdBu') 1535 plt.colorbar() 1536 plt.draw() 1537 1538 if correlation is True: 1539 return corr 1540 1541 errors = [o.dvalue for o in obs] 1542 cov = np.diag(errors) @ corr @ np.diag(errors) 1543 1544 eigenvalues = np.linalg.eigh(cov)[0] 1545 if not np.all(eigenvalues >= 0): 1546 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1547 1548 return cov 1549 1550 1551def invert_corr_cov_cholesky(corr, inverrdiag): 1552 """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr` 1553 and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`. 1554 1555 Parameters 1556 ---------- 1557 corr : np.ndarray 1558 correlation matrix 1559 inverrdiag : np.ndarray 1560 diagonal matrix, the entries are the inverse errors of the data points considered 1561 """ 1562 1563 condn = np.linalg.cond(corr) 1564 if condn > 0.1 / np.finfo(float).eps: 1565 raise ValueError(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 1566 if condn > 1e13: 1567 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 1568 chol = np.linalg.cholesky(corr) 1569 chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True) 1570 1571 return chol_inv 1572 1573 1574def sort_corr(corr, kl, yd): 1575 """ Reorders a correlation matrix to match the alphabetical order of its underlying y data. 1576 1577 The ordering of the input correlation matrix `corr` is given by the list of keys `kl`. 1578 The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data 1579 that the correlation matrix is based on. 1580 This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr` 1581 according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds 1582 to the y data `yd` when arranged in an alphabetical order by its keys. 1583 1584 Parameters 1585 ---------- 1586 corr : np.ndarray 1587 A square correlation matrix constructed using the order of the y data specified by `kl`. 1588 The dimensions of `corr` should match the total number of y data points in `yd` combined. 1589 kl : list of str 1590 A list of keys that denotes the order in which the y data from `yd` was used to build the 1591 input correlation matrix `corr`. 1592 yd : dict of list 1593 A dictionary where each key corresponds to a unique identifier, and its value is a list of 1594 y data points. The total number of y data points across all keys must match the dimensions 1595 of `corr`. The lists in the dictionary can be lists of Obs. 1596 1597 Returns 1598 ------- 1599 np.ndarray 1600 A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys. 1601 1602 Example 1603 ------- 1604 >>> import numpy as np 1605 >>> import pyerrors as pe 1606 >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]]) 1607 >>> kl = ['b', 'a'] 1608 >>> yd = {'a': [1, 2], 'b': [3]} 1609 >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd) 1610 >>> print(sorted_corr) 1611 array([[1. , 0.3, 0.4], 1612 [0.3, 1. , 0.2], 1613 [0.4, 0.2, 1. ]]) 1614 1615 """ 1616 kl_sorted = sorted(kl) 1617 1618 posd = {} 1619 ofs = 0 1620 for ki, k in enumerate(kl): 1621 posd[k] = [i + ofs for i in range(len(yd[k]))] 1622 ofs += len(posd[k]) 1623 1624 mapping = [] 1625 for k in kl_sorted: 1626 for i in range(len(yd[k])): 1627 mapping.append(posd[k][i]) 1628 1629 corr_sorted = np.zeros_like(corr) 1630 for i in range(corr.shape[0]): 1631 for j in range(corr.shape[0]): 1632 corr_sorted[i][j] = corr[mapping[i]][mapping[j]] 1633 1634 return corr_sorted 1635 1636 1637def _smooth_eigenvalues(corr, E): 1638 """Eigenvalue smoothing as described in hep-lat/9412087 1639 1640 corr : np.ndarray 1641 correlation matrix 1642 E : integer 1643 Number of eigenvalues to be left substantially unchanged 1644 """ 1645 if not (2 < E < corr.shape[0] - 1): 1646 raise ValueError(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1647 vals, vec = np.linalg.eigh(corr) 1648 lambda_min = np.mean(vals[:-E]) 1649 vals[vals < lambda_min] = lambda_min 1650 vals /= np.mean(vals) 1651 return vec @ np.diag(vals) @ vec.T 1652 1653 1654def _covariance_element(obs1, obs2): 1655 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1656 1657 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1658 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1659 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1660 return np.sum(deltas1 * deltas2) 1661 1662 if set(obs1.names).isdisjoint(set(obs2.names)): 1663 return 0.0 1664 1665 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1666 raise Exception('The gamma method has to be applied to both Obs first.') 1667 1668 dvalue = 0.0 1669 1670 for e_name in obs1.mc_names: 1671 1672 if e_name not in obs2.mc_names: 1673 continue 1674 1675 idl_d = {} 1676 for r_name in obs1.e_content[e_name]: 1677 if r_name not in obs2.e_content[e_name]: 1678 continue 1679 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1680 1681 gamma = 0.0 1682 1683 for r_name in obs1.e_content[e_name]: 1684 if r_name not in obs2.e_content[e_name]: 1685 continue 1686 if len(idl_d[r_name]) == 0: 1687 continue 1688 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1689 1690 if gamma == 0.0: 1691 continue 1692 1693 gamma_div = 0.0 1694 for r_name in obs1.e_content[e_name]: 1695 if r_name not in obs2.e_content[e_name]: 1696 continue 1697 if len(idl_d[r_name]) == 0: 1698 continue 1699 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1700 gamma /= gamma_div 1701 1702 dvalue += gamma 1703 1704 for e_name in obs1.cov_names: 1705 1706 if e_name not in obs2.cov_names: 1707 continue 1708 1709 dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() 1710 1711 return dvalue 1712 1713 1714def import_jackknife(jacks, name, idl=None): 1715 """Imports jackknife samples and returns an Obs 1716 1717 Parameters 1718 ---------- 1719 jacks : numpy.ndarray 1720 numpy array containing the mean value as zeroth entry and 1721 the N jackknife samples as first to Nth entry. 1722 name : str 1723 name of the ensemble the samples are defined on. 1724 """ 1725 length = len(jacks) - 1 1726 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1727 samples = jacks[1:] @ prj 1728 mean = np.mean(samples) 1729 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1730 new_obs._value = jacks[0] 1731 return new_obs 1732 1733 1734def import_bootstrap(boots, name, random_numbers): 1735 """Imports bootstrap samples and returns an Obs 1736 1737 Parameters 1738 ---------- 1739 boots : numpy.ndarray 1740 numpy array containing the mean value as zeroth entry and 1741 the N bootstrap samples as first to Nth entry. 1742 name : str 1743 name of the ensemble the samples are defined on. 1744 random_numbers : np.ndarray 1745 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, 1746 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo 1747 chain to be reconstructed. 1748 """ 1749 samples, length = random_numbers.shape 1750 if samples != len(boots) - 1: 1751 raise ValueError("Random numbers do not have the correct shape.") 1752 1753 if samples < length: 1754 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") 1755 1756 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 1757 1758 samples = scipy.linalg.lstsq(proj, boots[1:])[0] 1759 ret = Obs([samples], [name]) 1760 ret._value = boots[0] 1761 return ret 1762 1763 1764def merge_obs(list_of_obs): 1765 """Combine all observables in list_of_obs into one new observable. 1766 This allows to merge Obs that have been computed on multiple replica 1767 of the same ensemble. 1768 If you like to merge Obs that are based on several ensembles, please 1769 average them yourself. 1770 1771 Parameters 1772 ---------- 1773 list_of_obs : list 1774 list of the Obs object to be combined 1775 1776 Notes 1777 ----- 1778 It is not possible to combine obs which are based on the same replicum 1779 """ 1780 replist = [item for obs in list_of_obs for item in obs.names] 1781 if (len(replist) == len(set(replist))) is False: 1782 raise ValueError('list_of_obs contains duplicate replica: %s' % (str(replist))) 1783 if any([len(o.cov_names) for o in list_of_obs]): 1784 raise ValueError('Not possible to merge data that contains covobs!') 1785 new_dict = {} 1786 idl_dict = {} 1787 for o in list_of_obs: 1788 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1789 for key in set(o.deltas) | set(o.r_values)}) 1790 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1791 1792 names = sorted(new_dict.keys()) 1793 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1794 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1795 return o 1796 1797 1798def cov_Obs(means, cov, name, grad=None): 1799 """Create an Obs based on mean(s) and a covariance matrix 1800 1801 Parameters 1802 ---------- 1803 mean : list of floats or float 1804 N mean value(s) of the new Obs 1805 cov : list or array 1806 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1807 name : str 1808 identifier for the covariance matrix 1809 grad : list or array 1810 Gradient of the Covobs wrt. the means belonging to cov. 1811 """ 1812 1813 def covobs_to_obs(co): 1814 """Make an Obs out of a Covobs 1815 1816 Parameters 1817 ---------- 1818 co : Covobs 1819 Covobs to be embedded into the Obs 1820 """ 1821 o = Obs([], [], means=[]) 1822 o._value = co.value 1823 o.names.append(co.name) 1824 o._covobs[co.name] = co 1825 o._dvalue = np.sqrt(co.errsq()) 1826 return o 1827 1828 ol = [] 1829 if isinstance(means, (float, int)): 1830 means = [means] 1831 1832 for i in range(len(means)): 1833 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1834 if ol[0].covobs[name].N != len(means): 1835 raise ValueError('You have to provide %d mean values!' % (ol[0].N)) 1836 if len(ol) == 1: 1837 return ol[0] 1838 return ol 1839 1840 1841def _determine_gap(o, e_content, e_name): 1842 gaps = [] 1843 for r_name in e_content[e_name]: 1844 if isinstance(o.idl[r_name], range): 1845 gaps.append(o.idl[r_name].step) 1846 else: 1847 gaps.append(np.min(np.diff(o.idl[r_name]))) 1848 1849 gap = min(gaps) 1850 if not np.all([gi % gap == 0 for gi in gaps]): 1851 raise ValueError(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) 1852 1853 return gap 1854 1855 1856def _check_lists_equal(idl): 1857 ''' 1858 Use groupby to efficiently check whether all elements of idl are identical. 1859 Returns True if all elements are equal, otherwise False. 1860 1861 Parameters 1862 ---------- 1863 idl : list of lists, ranges or np.ndarrays 1864 ''' 1865 g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl]) 1866 if next(g, True) and not next(g, False): 1867 return True 1868 return False
19class Obs: 20 """Class for a general observable. 21 22 Instances of Obs are the basic objects of a pyerrors error analysis. 23 They are initialized with a list which contains arrays of samples for 24 different ensembles/replica and another list of same length which contains 25 the names of the ensembles/replica. Mathematical operations can be 26 performed on instances. The result is another instance of Obs. The error of 27 an instance can be computed with the gamma_method. Also contains additional 28 methods for output and visualization of the error calculation. 29 30 Attributes 31 ---------- 32 S_global : float 33 Standard value for S (default 2.0) 34 S_dict : dict 35 Dictionary for S values. If an entry for a given ensemble 36 exists this overwrites the standard value for that ensemble. 37 tau_exp_global : float 38 Standard value for tau_exp (default 0.0) 39 tau_exp_dict : dict 40 Dictionary for tau_exp values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 N_sigma_global : float 43 Standard value for N_sigma (default 1.0) 44 N_sigma_dict : dict 45 Dictionary for N_sigma values. If an entry for a given ensemble exists 46 this overwrites the standard value for that ensemble. 47 """ 48 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 49 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 50 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 51 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 52 'idl', 'tag', '_covobs', '__dict__'] 53 54 S_global = 2.0 55 S_dict = {} 56 tau_exp_global = 0.0 57 tau_exp_dict = {} 58 N_sigma_global = 1.0 59 N_sigma_dict = {} 60 61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 if len(set([o.split('|')[0] for o in names])) > 1: 87 raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.') 88 else: 89 if not isinstance(names[0], str): 90 raise TypeError('All names have to be strings.') 91 if min(len(x) for x in samples) <= 4: 92 raise ValueError('Samples have to have at least 5 entries.') 93 94 self.names = sorted(names) 95 self.shape = {} 96 self.r_values = {} 97 self.deltas = {} 98 self._covobs = {} 99 100 self._value = 0 101 self.N = 0 102 self.idl = {} 103 if idl is not None: 104 for name, idx in sorted(zip(names, idl)): 105 if isinstance(idx, range): 106 self.idl[name] = idx 107 elif isinstance(idx, (list, np.ndarray)): 108 dc = np.unique(np.diff(idx)) 109 if np.any(dc < 0): 110 raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]]))) 111 elif np.any(dc == 0): 112 raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]]))) 113 if len(dc) == 1: 114 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 115 else: 116 self.idl[name] = list(idx) 117 else: 118 raise TypeError('incompatible type for idl[%s].' % (name)) 119 else: 120 for name, sample in sorted(zip(names, samples)): 121 self.idl[name] = range(1, len(sample) + 1) 122 123 if kwargs.get("means") is not None: 124 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 125 self.shape[name] = len(self.idl[name]) 126 self.N += self.shape[name] 127 self.r_values[name] = mean 128 self.deltas[name] = sample 129 else: 130 for name, sample in sorted(zip(names, samples)): 131 self.shape[name] = len(self.idl[name]) 132 self.N += self.shape[name] 133 if len(sample) != self.shape[name]: 134 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 135 self.r_values[name] = np.mean(sample) 136 self.deltas[name] = sample - self.r_values[name] 137 self._value += self.shape[name] * self.r_values[name] 138 self._value /= self.N 139 140 self._dvalue = 0.0 141 self.ddvalue = 0.0 142 self.reweighted = False 143 144 self.tag = None 145 146 @property 147 def value(self): 148 return self._value 149 150 @property 151 def dvalue(self): 152 return self._dvalue 153 154 @property 155 def e_names(self): 156 return sorted(set([o.split('|')[0] for o in self.names])) 157 158 @property 159 def cov_names(self): 160 return sorted(set([o for o in self.covobs.keys()])) 161 162 @property 163 def mc_names(self): 164 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 165 166 @property 167 def e_content(self): 168 res = {} 169 for e, e_name in enumerate(self.e_names): 170 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 171 if e_name in self.names: 172 res[e_name].append(e_name) 173 return res 174 175 @property 176 def covobs(self): 177 return self._covobs 178 179 def gamma_method(self, **kwargs): 180 """Estimate the error and related properties of the Obs. 181 182 Parameters 183 ---------- 184 S : float 185 specifies a custom value for the parameter S (default 2.0). 186 If set to 0 it is assumed that the data exhibits no 187 autocorrelation. In this case the error estimates coincides 188 with the sample standard error. 189 tau_exp : float 190 positive value triggers the critical slowing down analysis 191 (default 0.0). 192 N_sigma : float 193 number of standard deviations from zero until the tail is 194 attached to the autocorrelation function (default 1). 195 fft : bool 196 determines whether the fft algorithm is used for the computation 197 of the autocorrelation function (default True) 198 """ 199 200 e_content = self.e_content 201 self.e_dvalue = {} 202 self.e_ddvalue = {} 203 self.e_tauint = {} 204 self.e_dtauint = {} 205 self.e_windowsize = {} 206 self.e_n_tauint = {} 207 self.e_n_dtauint = {} 208 e_gamma = {} 209 self.e_rho = {} 210 self.e_drho = {} 211 self._dvalue = 0 212 self.ddvalue = 0 213 214 self.S = {} 215 self.tau_exp = {} 216 self.N_sigma = {} 217 218 if kwargs.get('fft') is False: 219 fft = False 220 else: 221 fft = True 222 223 def _parse_kwarg(kwarg_name): 224 if kwarg_name in kwargs: 225 tmp = kwargs.get(kwarg_name) 226 if isinstance(tmp, (int, float)): 227 if tmp < 0: 228 raise ValueError(kwarg_name + ' has to be larger or equal to 0.') 229 for e, e_name in enumerate(self.e_names): 230 getattr(self, kwarg_name)[e_name] = tmp 231 else: 232 raise TypeError(kwarg_name + ' is not in proper format.') 233 else: 234 for e, e_name in enumerate(self.e_names): 235 if e_name in getattr(Obs, kwarg_name + '_dict'): 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 237 else: 238 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 239 240 _parse_kwarg('S') 241 _parse_kwarg('tau_exp') 242 _parse_kwarg('N_sigma') 243 244 for e, e_name in enumerate(self.mc_names): 245 gapsize = _determine_gap(self, e_content, e_name) 246 247 r_length = [] 248 for r_name in e_content[e_name]: 249 if isinstance(self.idl[r_name], range): 250 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 251 else: 252 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 253 254 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 255 w_max = max(r_length) // 2 256 e_gamma[e_name] = np.zeros(w_max) 257 self.e_rho[e_name] = np.zeros(w_max) 258 self.e_drho[e_name] = np.zeros(w_max) 259 260 for r_name in e_content[e_name]: 261 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 263 gamma_div = np.zeros(w_max) 264 for r_name in e_content[e_name]: 265 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 266 gamma_div[gamma_div < 1] = 1.0 267 e_gamma[e_name] /= gamma_div[:w_max] 268 269 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 270 self.e_tauint[e_name] = 0.5 271 self.e_dtauint[e_name] = 0.0 272 self.e_dvalue[e_name] = 0.0 273 self.e_ddvalue[e_name] = 0.0 274 self.e_windowsize[e_name] = 0 275 continue 276 277 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 278 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 279 # Make sure no entry of tauint is smaller than 0.5 280 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 281 # hep-lat/0306017 eq. (42) 282 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 283 self.e_n_dtauint[e_name][0] = 0.0 284 285 def _compute_drho(i): 286 tmp = (self.e_rho[e_name][i + 1:w_max] 287 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 288 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 289 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 290 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 291 292 if self.tau_exp[e_name] > 0: 293 _compute_drho(1) 294 texp = self.tau_exp[e_name] 295 # Critical slowing down analysis 296 if w_max // 2 <= 1: 297 raise ValueError("Need at least 8 samples for tau_exp error analysis") 298 for n in range(1, w_max // 2): 299 _compute_drho(n + 1) 300 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 301 # Bias correction hep-lat/0306017 eq. (49) included 302 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 303 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 304 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 305 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 306 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 307 self.e_windowsize[e_name] = n 308 break 309 else: 310 if self.S[e_name] == 0.0: 311 self.e_tauint[e_name] = 0.5 312 self.e_dtauint[e_name] = 0.0 313 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 315 self.e_windowsize[e_name] = 0 316 else: 317 # Standard automatic windowing procedure 318 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 319 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 320 for n in range(1, w_max): 321 if g_w[n - 1] < 0 or n >= w_max - 1: 322 _compute_drho(n) 323 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 324 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 325 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 326 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 327 self.e_windowsize[e_name] = n 328 break 329 330 self._dvalue += self.e_dvalue[e_name] ** 2 331 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 332 333 for e_name in self.cov_names: 334 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 335 self.e_ddvalue[e_name] = 0 336 self._dvalue += self.e_dvalue[e_name]**2 337 338 self._dvalue = np.sqrt(self._dvalue) 339 if self._dvalue == 0.0: 340 self.ddvalue = 0.0 341 else: 342 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 343 return 344 345 gm = gamma_method 346 347 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 348 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 349 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 350 351 Parameters 352 ---------- 353 deltas : list 354 List of fluctuations 355 idx : list 356 List or range of configurations on which the deltas are defined. 357 shape : int 358 Number of configurations in idx. 359 w_max : int 360 Upper bound for the summation window. 361 fft : bool 362 determines whether the fft algorithm is used for the computation 363 of the autocorrelation function. 364 gapsize : int 365 The target distance between two configurations. If longer distances 366 are found in idx, the data is expanded. 367 """ 368 gamma = np.zeros(w_max) 369 deltas = _expand_deltas(deltas, idx, shape, gapsize) 370 new_shape = len(deltas) 371 if fft: 372 max_gamma = min(new_shape, w_max) 373 # The padding for the fft has to be even 374 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 375 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 376 else: 377 for n in range(w_max): 378 if new_shape - n >= 0: 379 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 380 381 return gamma 382 383 def details(self, ens_content=True): 384 """Output detailed properties of the Obs. 385 386 Parameters 387 ---------- 388 ens_content : bool 389 print details about the ensembles and replica if true. 390 """ 391 if self.tag is not None: 392 print("Description:", self.tag) 393 if not hasattr(self, 'e_dvalue'): 394 print('Result\t %3.8e' % (self.value)) 395 else: 396 if self.value == 0.0: 397 percentage = np.nan 398 else: 399 percentage = np.abs(self._dvalue / self.value) * 100 400 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 401 if len(self.e_names) > 1: 402 print(' Ensemble errors:') 403 e_content = self.e_content 404 for e_name in self.mc_names: 405 gap = _determine_gap(self, e_content, e_name) 406 407 if len(self.e_names) > 1: 408 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 409 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 410 tau_string += f" in units of {gap} config" 411 if gap > 1: 412 tau_string += "s" 413 if self.tau_exp[e_name] > 0: 414 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 415 else: 416 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 417 print(tau_string) 418 for e_name in self.cov_names: 419 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 420 if ens_content is True: 421 if len(self.e_names) == 1: 422 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 423 else: 424 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 425 my_string_list = [] 426 for key, value in sorted(self.e_content.items()): 427 if key not in self.covobs: 428 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 429 if len(value) == 1: 430 my_string += f': {self.shape[value[0]]} configurations' 431 if isinstance(self.idl[value[0]], range): 432 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 433 else: 434 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 435 else: 436 sublist = [] 437 for v in value: 438 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 439 my_substring += f': {self.shape[v]} configurations' 440 if isinstance(self.idl[v], range): 441 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 442 else: 443 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 444 sublist.append(my_substring) 445 446 my_string += '\n' + '\n'.join(sublist) 447 else: 448 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 449 my_string_list.append(my_string) 450 print('\n'.join(my_string_list)) 451 452 def reweight(self, weight): 453 """Reweight the obs with given rewighting factors. 454 455 Parameters 456 ---------- 457 weight : Obs 458 Reweighting factor. An Observable that has to be defined on a superset of the 459 configurations in obs[i].idl for all i. 460 all_configs : bool 461 if True, the reweighted observables are normalized by the average of 462 the reweighting factor on all configurations in weight.idl and not 463 on the configurations in obs[i].idl. Default False. 464 """ 465 return reweight(weight, [self])[0] 466 467 def is_zero_within_error(self, sigma=1): 468 """Checks whether the observable is zero within 'sigma' standard errors. 469 470 Parameters 471 ---------- 472 sigma : int 473 Number of standard errors used for the check. 474 475 Works only properly when the gamma method was run. 476 """ 477 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 478 479 def is_zero(self, atol=1e-10): 480 """Checks whether the observable is zero within a given tolerance. 481 482 Parameters 483 ---------- 484 atol : float 485 Absolute tolerance (for details see numpy documentation). 486 """ 487 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 488 489 def plot_tauint(self, save=None): 490 """Plot integrated autocorrelation time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 500 for e, e_name in enumerate(self.mc_names): 501 fig = plt.figure() 502 plt.xlabel(r'$W$') 503 plt.ylabel(r'$\tau_\mathrm{int}$') 504 length = int(len(self.e_n_tauint[e_name])) 505 if self.tau_exp[e_name] > 0: 506 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 507 x_help = np.arange(2 * self.tau_exp[e_name]) 508 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 509 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 510 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 511 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 512 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 513 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 514 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 515 else: 516 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 517 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 518 519 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 520 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 521 plt.legend() 522 plt.xlim(-0.5, xmax) 523 ylim = plt.ylim() 524 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 525 plt.draw() 526 if save: 527 fig.savefig(save + "_" + str(e)) 528 529 def plot_rho(self, save=None): 530 """Plot normalized autocorrelation function time for each ensemble. 531 532 Parameters 533 ---------- 534 save : str 535 saves the figure to a file named 'save' if. 536 """ 537 if not hasattr(self, 'e_dvalue'): 538 raise Exception('Run the gamma method first.') 539 for e, e_name in enumerate(self.mc_names): 540 fig = plt.figure() 541 plt.xlabel('W') 542 plt.ylabel('rho') 543 length = int(len(self.e_drho[e_name])) 544 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 545 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 546 if self.tau_exp[e_name] > 0: 547 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 548 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 549 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 550 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 551 else: 552 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 553 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 554 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 555 plt.xlim(-0.5, xmax) 556 plt.draw() 557 if save: 558 fig.savefig(save + "_" + str(e)) 559 560 def plot_rep_dist(self): 561 """Plot replica distribution for each ensemble with more than one replicum.""" 562 if not hasattr(self, 'e_dvalue'): 563 raise Exception('Run the gamma method first.') 564 for e, e_name in enumerate(self.mc_names): 565 if len(self.e_content[e_name]) == 1: 566 print('No replica distribution for a single replicum (', e_name, ')') 567 continue 568 r_length = [] 569 sub_r_mean = 0 570 for r, r_name in enumerate(self.e_content[e_name]): 571 r_length.append(len(self.deltas[r_name])) 572 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 573 e_N = np.sum(r_length) 574 sub_r_mean /= e_N 575 arr = np.zeros(len(self.e_content[e_name])) 576 for r, r_name in enumerate(self.e_content[e_name]): 577 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 578 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 579 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 580 plt.draw() 581 582 def plot_history(self, expand=True): 583 """Plot derived Monte Carlo history for each ensemble 584 585 Parameters 586 ---------- 587 expand : bool 588 show expanded history for irregular Monte Carlo chains (default: True). 589 """ 590 for e, e_name in enumerate(self.mc_names): 591 plt.figure() 592 r_length = [] 593 tmp = [] 594 tmp_expanded = [] 595 for r, r_name in enumerate(self.e_content[e_name]): 596 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 597 if expand: 598 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 599 r_length.append(len(tmp_expanded[-1])) 600 else: 601 r_length.append(len(tmp[-1])) 602 e_N = np.sum(r_length) 603 x = np.arange(e_N) 604 y_test = np.concatenate(tmp, axis=0) 605 if expand: 606 y = np.concatenate(tmp_expanded, axis=0) 607 else: 608 y = y_test 609 plt.errorbar(x, y, fmt='.', markersize=3) 610 plt.xlim(-0.5, e_N - 0.5) 611 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 612 plt.draw() 613 614 def plot_piechart(self, save=None): 615 """Plot piechart which shows the fractional contribution of each 616 ensemble to the error and returns a dictionary containing the fractions. 617 618 Parameters 619 ---------- 620 save : str 621 saves the figure to a file named 'save' if. 622 """ 623 if not hasattr(self, 'e_dvalue'): 624 raise Exception('Run the gamma method first.') 625 if np.isclose(0.0, self._dvalue, atol=1e-15): 626 raise ValueError('Error is 0.0') 627 labels = self.e_names 628 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 629 fig1, ax1 = plt.subplots() 630 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 631 ax1.axis('equal') 632 plt.draw() 633 if save: 634 fig1.savefig(save) 635 636 return dict(zip(labels, sizes)) 637 638 def dump(self, filename, datatype="json.gz", description="", **kwargs): 639 """Dump the Obs to a file 'name' of chosen format. 640 641 Parameters 642 ---------- 643 filename : str 644 name of the file to be saved. 645 datatype : str 646 Format of the exported file. Supported formats include 647 "json.gz" and "pickle" 648 description : str 649 Description for output file, only relevant for json.gz format. 650 path : str 651 specifies a custom path for the file (default '.') 652 """ 653 if 'path' in kwargs: 654 file_name = kwargs.get('path') + '/' + filename 655 else: 656 file_name = filename 657 658 if datatype == "json.gz": 659 from .input.json import dump_to_json 660 dump_to_json([self], file_name, description=description) 661 elif datatype == "pickle": 662 with open(file_name + '.p', 'wb') as fb: 663 pickle.dump(self, fb) 664 else: 665 raise TypeError("Unknown datatype " + str(datatype)) 666 667 def export_jackknife(self): 668 """Export jackknife samples from the Obs 669 670 Returns 671 ------- 672 numpy.ndarray 673 Returns a numpy array of length N + 1 where N is the number of samples 674 for the given ensemble and replicum. The zeroth entry of the array contains 675 the mean value of the Obs, entries 1 to N contain the N jackknife samples 676 derived from the Obs. The current implementation only works for observables 677 defined on exactly one ensemble and replicum. The derived jackknife samples 678 should agree with samples from a full jackknife analysis up to O(1/N). 679 """ 680 681 if len(self.names) != 1: 682 raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 683 684 name = self.names[0] 685 full_data = self.deltas[name] + self.r_values[name] 686 n = full_data.size 687 mean = self.value 688 tmp_jacks = np.zeros(n + 1) 689 tmp_jacks[0] = mean 690 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 691 return tmp_jacks 692 693 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 694 """Export bootstrap samples from the Obs 695 696 Parameters 697 ---------- 698 samples : int 699 Number of bootstrap samples to generate. 700 random_numbers : np.ndarray 701 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 702 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 703 save_rng : str 704 Save the random numbers to a file if a path is specified. 705 706 Returns 707 ------- 708 numpy.ndarray 709 Returns a numpy array of length N + 1 where N is the number of samples 710 for the given ensemble and replicum. The zeroth entry of the array contains 711 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 712 derived from the Obs. The current implementation only works for observables 713 defined on exactly one ensemble and replicum. The derived bootstrap samples 714 should agree with samples from a full bootstrap analysis up to O(1/N). 715 """ 716 if len(self.names) != 1: 717 raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 718 719 name = self.names[0] 720 length = self.N 721 722 if random_numbers is None: 723 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 724 rng = np.random.default_rng(seed) 725 random_numbers = rng.integers(0, length, size=(samples, length)) 726 727 if save_rng is not None: 728 np.savetxt(save_rng, random_numbers, fmt='%i') 729 730 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 731 ret = np.zeros(samples + 1) 732 ret[0] = self.value 733 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 734 return ret 735 736 def __float__(self): 737 return float(self.value) 738 739 def __repr__(self): 740 return 'Obs[' + str(self) + ']' 741 742 def __str__(self): 743 return _format_uncertainty(self.value, self._dvalue) 744 745 def __format__(self, format_type): 746 if format_type == "": 747 significance = 2 748 else: 749 significance = int(float(format_type.replace("+", "").replace("-", ""))) 750 my_str = _format_uncertainty(self.value, self._dvalue, 751 significance=significance) 752 for char in ["+", " "]: 753 if format_type.startswith(char): 754 if my_str[0] != "-": 755 my_str = char + my_str 756 return my_str 757 758 def __hash__(self): 759 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 760 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 761 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 762 hash_tuple += tuple([o.encode() for o in self.names]) 763 m = hashlib.md5() 764 [m.update(o) for o in hash_tuple] 765 return int(m.hexdigest(), 16) & 0xFFFFFFFF 766 767 # Overload comparisons 768 def __lt__(self, other): 769 return self.value < other 770 771 def __le__(self, other): 772 return self.value <= other 773 774 def __gt__(self, other): 775 return self.value > other 776 777 def __ge__(self, other): 778 return self.value >= other 779 780 def __eq__(self, other): 781 if other is None: 782 return False 783 return (self - other).is_zero() 784 785 # Overload math operations 786 def __add__(self, y): 787 if isinstance(y, Obs): 788 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 789 else: 790 if isinstance(y, np.ndarray): 791 return np.array([self + o for o in y]) 792 elif isinstance(y, complex): 793 return CObs(self, 0) + y 794 elif y.__class__.__name__ in ['Corr', 'CObs']: 795 return NotImplemented 796 else: 797 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 798 799 def __radd__(self, y): 800 return self + y 801 802 def __mul__(self, y): 803 if isinstance(y, Obs): 804 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 805 else: 806 if isinstance(y, np.ndarray): 807 return np.array([self * o for o in y]) 808 elif isinstance(y, complex): 809 return CObs(self * y.real, self * y.imag) 810 elif y.__class__.__name__ in ['Corr', 'CObs']: 811 return NotImplemented 812 else: 813 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 814 815 def __rmul__(self, y): 816 return self * y 817 818 def __sub__(self, y): 819 if isinstance(y, Obs): 820 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 821 else: 822 if isinstance(y, np.ndarray): 823 return np.array([self - o for o in y]) 824 elif y.__class__.__name__ in ['Corr', 'CObs']: 825 return NotImplemented 826 else: 827 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 828 829 def __rsub__(self, y): 830 return -1 * (self - y) 831 832 def __pos__(self): 833 return self 834 835 def __neg__(self): 836 return -1 * self 837 838 def __truediv__(self, y): 839 if isinstance(y, Obs): 840 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 841 else: 842 if isinstance(y, np.ndarray): 843 return np.array([self / o for o in y]) 844 elif y.__class__.__name__ in ['Corr', 'CObs']: 845 return NotImplemented 846 else: 847 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 848 849 def __rtruediv__(self, y): 850 if isinstance(y, Obs): 851 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 852 else: 853 if isinstance(y, np.ndarray): 854 return np.array([o / self for o in y]) 855 elif y.__class__.__name__ in ['Corr', 'CObs']: 856 return NotImplemented 857 else: 858 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 859 860 def __pow__(self, y): 861 if isinstance(y, Obs): 862 return derived_observable(lambda x, **kwargs: x[0] ** x[1], [self, y], man_grad=[y.value * self.value ** (y.value - 1), self.value ** y.value * np.log(self.value)]) 863 else: 864 return derived_observable(lambda x, **kwargs: x[0] ** y, [self], man_grad=[y * self.value ** (y - 1)]) 865 866 def __rpow__(self, y): 867 return derived_observable(lambda x, **kwargs: y ** x[0], [self], man_grad=[y ** self.value * np.log(y)]) 868 869 def __abs__(self): 870 return derived_observable(lambda x: anp.abs(x[0]), [self]) 871 872 # Overload numpy functions 873 def sqrt(self): 874 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 875 876 def log(self): 877 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 878 879 def exp(self): 880 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 881 882 def sin(self): 883 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 884 885 def cos(self): 886 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 887 888 def tan(self): 889 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 890 891 def arcsin(self): 892 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 893 894 def arccos(self): 895 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 896 897 def arctan(self): 898 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 899 900 def sinh(self): 901 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 902 903 def cosh(self): 904 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 905 906 def tanh(self): 907 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 908 909 def arcsinh(self): 910 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 911 912 def arccosh(self): 913 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 914 915 def arctanh(self): 916 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
61 def __init__(self, samples, names, idl=None, **kwargs): 62 """ Initialize Obs object. 63 64 Parameters 65 ---------- 66 samples : list 67 list of numpy arrays containing the Monte Carlo samples 68 names : list 69 list of strings labeling the individual samples 70 idl : list, optional 71 list of ranges or lists on which the samples are defined 72 """ 73 74 if kwargs.get("means") is None and len(samples): 75 if len(samples) != len(names): 76 raise ValueError('Length of samples and names incompatible.') 77 if idl is not None: 78 if len(idl) != len(names): 79 raise ValueError('Length of idl incompatible with samples and names.') 80 name_length = len(names) 81 if name_length > 1: 82 if name_length != len(set(names)): 83 raise ValueError('Names are not unique.') 84 if not all(isinstance(x, str) for x in names): 85 raise TypeError('All names have to be strings.') 86 if len(set([o.split('|')[0] for o in names])) > 1: 87 raise ValueError('Cannot initialize Obs based on multiple ensembles. Please average separate Obs from each ensemble.') 88 else: 89 if not isinstance(names[0], str): 90 raise TypeError('All names have to be strings.') 91 if min(len(x) for x in samples) <= 4: 92 raise ValueError('Samples have to have at least 5 entries.') 93 94 self.names = sorted(names) 95 self.shape = {} 96 self.r_values = {} 97 self.deltas = {} 98 self._covobs = {} 99 100 self._value = 0 101 self.N = 0 102 self.idl = {} 103 if idl is not None: 104 for name, idx in sorted(zip(names, idl)): 105 if isinstance(idx, range): 106 self.idl[name] = idx 107 elif isinstance(idx, (list, np.ndarray)): 108 dc = np.unique(np.diff(idx)) 109 if np.any(dc < 0): 110 raise ValueError("Unsorted idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) < 0)[0]]))) 111 elif np.any(dc == 0): 112 raise ValueError("Duplicate entries in idx for idl[%s] at position %s" % (name, ' '.join(['%s' % (pos + 1) for pos in np.where(np.diff(idx) == 0)[0]]))) 113 if len(dc) == 1: 114 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 115 else: 116 self.idl[name] = list(idx) 117 else: 118 raise TypeError('incompatible type for idl[%s].' % (name)) 119 else: 120 for name, sample in sorted(zip(names, samples)): 121 self.idl[name] = range(1, len(sample) + 1) 122 123 if kwargs.get("means") is not None: 124 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 125 self.shape[name] = len(self.idl[name]) 126 self.N += self.shape[name] 127 self.r_values[name] = mean 128 self.deltas[name] = sample 129 else: 130 for name, sample in sorted(zip(names, samples)): 131 self.shape[name] = len(self.idl[name]) 132 self.N += self.shape[name] 133 if len(sample) != self.shape[name]: 134 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 135 self.r_values[name] = np.mean(sample) 136 self.deltas[name] = sample - self.r_values[name] 137 self._value += self.shape[name] * self.r_values[name] 138 self._value /= self.N 139 140 self._dvalue = 0.0 141 self.ddvalue = 0.0 142 self.reweighted = False 143 144 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
179 def gamma_method(self, **kwargs): 180 """Estimate the error and related properties of the Obs. 181 182 Parameters 183 ---------- 184 S : float 185 specifies a custom value for the parameter S (default 2.0). 186 If set to 0 it is assumed that the data exhibits no 187 autocorrelation. In this case the error estimates coincides 188 with the sample standard error. 189 tau_exp : float 190 positive value triggers the critical slowing down analysis 191 (default 0.0). 192 N_sigma : float 193 number of standard deviations from zero until the tail is 194 attached to the autocorrelation function (default 1). 195 fft : bool 196 determines whether the fft algorithm is used for the computation 197 of the autocorrelation function (default True) 198 """ 199 200 e_content = self.e_content 201 self.e_dvalue = {} 202 self.e_ddvalue = {} 203 self.e_tauint = {} 204 self.e_dtauint = {} 205 self.e_windowsize = {} 206 self.e_n_tauint = {} 207 self.e_n_dtauint = {} 208 e_gamma = {} 209 self.e_rho = {} 210 self.e_drho = {} 211 self._dvalue = 0 212 self.ddvalue = 0 213 214 self.S = {} 215 self.tau_exp = {} 216 self.N_sigma = {} 217 218 if kwargs.get('fft') is False: 219 fft = False 220 else: 221 fft = True 222 223 def _parse_kwarg(kwarg_name): 224 if kwarg_name in kwargs: 225 tmp = kwargs.get(kwarg_name) 226 if isinstance(tmp, (int, float)): 227 if tmp < 0: 228 raise ValueError(kwarg_name + ' has to be larger or equal to 0.') 229 for e, e_name in enumerate(self.e_names): 230 getattr(self, kwarg_name)[e_name] = tmp 231 else: 232 raise TypeError(kwarg_name + ' is not in proper format.') 233 else: 234 for e, e_name in enumerate(self.e_names): 235 if e_name in getattr(Obs, kwarg_name + '_dict'): 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 237 else: 238 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 239 240 _parse_kwarg('S') 241 _parse_kwarg('tau_exp') 242 _parse_kwarg('N_sigma') 243 244 for e, e_name in enumerate(self.mc_names): 245 gapsize = _determine_gap(self, e_content, e_name) 246 247 r_length = [] 248 for r_name in e_content[e_name]: 249 if isinstance(self.idl[r_name], range): 250 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 251 else: 252 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 253 254 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 255 w_max = max(r_length) // 2 256 e_gamma[e_name] = np.zeros(w_max) 257 self.e_rho[e_name] = np.zeros(w_max) 258 self.e_drho[e_name] = np.zeros(w_max) 259 260 for r_name in e_content[e_name]: 261 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 263 gamma_div = np.zeros(w_max) 264 for r_name in e_content[e_name]: 265 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 266 gamma_div[gamma_div < 1] = 1.0 267 e_gamma[e_name] /= gamma_div[:w_max] 268 269 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 270 self.e_tauint[e_name] = 0.5 271 self.e_dtauint[e_name] = 0.0 272 self.e_dvalue[e_name] = 0.0 273 self.e_ddvalue[e_name] = 0.0 274 self.e_windowsize[e_name] = 0 275 continue 276 277 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 278 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 279 # Make sure no entry of tauint is smaller than 0.5 280 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 281 # hep-lat/0306017 eq. (42) 282 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 283 self.e_n_dtauint[e_name][0] = 0.0 284 285 def _compute_drho(i): 286 tmp = (self.e_rho[e_name][i + 1:w_max] 287 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 288 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 289 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 290 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 291 292 if self.tau_exp[e_name] > 0: 293 _compute_drho(1) 294 texp = self.tau_exp[e_name] 295 # Critical slowing down analysis 296 if w_max // 2 <= 1: 297 raise ValueError("Need at least 8 samples for tau_exp error analysis") 298 for n in range(1, w_max // 2): 299 _compute_drho(n + 1) 300 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 301 # Bias correction hep-lat/0306017 eq. (49) included 302 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 303 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 304 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 305 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 306 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 307 self.e_windowsize[e_name] = n 308 break 309 else: 310 if self.S[e_name] == 0.0: 311 self.e_tauint[e_name] = 0.5 312 self.e_dtauint[e_name] = 0.0 313 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 315 self.e_windowsize[e_name] = 0 316 else: 317 # Standard automatic windowing procedure 318 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 319 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 320 for n in range(1, w_max): 321 if g_w[n - 1] < 0 or n >= w_max - 1: 322 _compute_drho(n) 323 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 324 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 325 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 326 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 327 self.e_windowsize[e_name] = n 328 break 329 330 self._dvalue += self.e_dvalue[e_name] ** 2 331 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 332 333 for e_name in self.cov_names: 334 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 335 self.e_ddvalue[e_name] = 0 336 self._dvalue += self.e_dvalue[e_name]**2 337 338 self._dvalue = np.sqrt(self._dvalue) 339 if self._dvalue == 0.0: 340 self.ddvalue = 0.0 341 else: 342 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 343 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
179 def gamma_method(self, **kwargs): 180 """Estimate the error and related properties of the Obs. 181 182 Parameters 183 ---------- 184 S : float 185 specifies a custom value for the parameter S (default 2.0). 186 If set to 0 it is assumed that the data exhibits no 187 autocorrelation. In this case the error estimates coincides 188 with the sample standard error. 189 tau_exp : float 190 positive value triggers the critical slowing down analysis 191 (default 0.0). 192 N_sigma : float 193 number of standard deviations from zero until the tail is 194 attached to the autocorrelation function (default 1). 195 fft : bool 196 determines whether the fft algorithm is used for the computation 197 of the autocorrelation function (default True) 198 """ 199 200 e_content = self.e_content 201 self.e_dvalue = {} 202 self.e_ddvalue = {} 203 self.e_tauint = {} 204 self.e_dtauint = {} 205 self.e_windowsize = {} 206 self.e_n_tauint = {} 207 self.e_n_dtauint = {} 208 e_gamma = {} 209 self.e_rho = {} 210 self.e_drho = {} 211 self._dvalue = 0 212 self.ddvalue = 0 213 214 self.S = {} 215 self.tau_exp = {} 216 self.N_sigma = {} 217 218 if kwargs.get('fft') is False: 219 fft = False 220 else: 221 fft = True 222 223 def _parse_kwarg(kwarg_name): 224 if kwarg_name in kwargs: 225 tmp = kwargs.get(kwarg_name) 226 if isinstance(tmp, (int, float)): 227 if tmp < 0: 228 raise ValueError(kwarg_name + ' has to be larger or equal to 0.') 229 for e, e_name in enumerate(self.e_names): 230 getattr(self, kwarg_name)[e_name] = tmp 231 else: 232 raise TypeError(kwarg_name + ' is not in proper format.') 233 else: 234 for e, e_name in enumerate(self.e_names): 235 if e_name in getattr(Obs, kwarg_name + '_dict'): 236 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 237 else: 238 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 239 240 _parse_kwarg('S') 241 _parse_kwarg('tau_exp') 242 _parse_kwarg('N_sigma') 243 244 for e, e_name in enumerate(self.mc_names): 245 gapsize = _determine_gap(self, e_content, e_name) 246 247 r_length = [] 248 for r_name in e_content[e_name]: 249 if isinstance(self.idl[r_name], range): 250 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 251 else: 252 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 253 254 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 255 w_max = max(r_length) // 2 256 e_gamma[e_name] = np.zeros(w_max) 257 self.e_rho[e_name] = np.zeros(w_max) 258 self.e_drho[e_name] = np.zeros(w_max) 259 260 for r_name in e_content[e_name]: 261 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 262 263 gamma_div = np.zeros(w_max) 264 for r_name in e_content[e_name]: 265 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 266 gamma_div[gamma_div < 1] = 1.0 267 e_gamma[e_name] /= gamma_div[:w_max] 268 269 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 270 self.e_tauint[e_name] = 0.5 271 self.e_dtauint[e_name] = 0.0 272 self.e_dvalue[e_name] = 0.0 273 self.e_ddvalue[e_name] = 0.0 274 self.e_windowsize[e_name] = 0 275 continue 276 277 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 278 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 279 # Make sure no entry of tauint is smaller than 0.5 280 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 281 # hep-lat/0306017 eq. (42) 282 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 283 self.e_n_dtauint[e_name][0] = 0.0 284 285 def _compute_drho(i): 286 tmp = (self.e_rho[e_name][i + 1:w_max] 287 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 288 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 289 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 290 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 291 292 if self.tau_exp[e_name] > 0: 293 _compute_drho(1) 294 texp = self.tau_exp[e_name] 295 # Critical slowing down analysis 296 if w_max // 2 <= 1: 297 raise ValueError("Need at least 8 samples for tau_exp error analysis") 298 for n in range(1, w_max // 2): 299 _compute_drho(n + 1) 300 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 301 # Bias correction hep-lat/0306017 eq. (49) included 302 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 303 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 304 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 305 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 306 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 307 self.e_windowsize[e_name] = n 308 break 309 else: 310 if self.S[e_name] == 0.0: 311 self.e_tauint[e_name] = 0.5 312 self.e_dtauint[e_name] = 0.0 313 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 315 self.e_windowsize[e_name] = 0 316 else: 317 # Standard automatic windowing procedure 318 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 319 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 320 for n in range(1, w_max): 321 if g_w[n - 1] < 0 or n >= w_max - 1: 322 _compute_drho(n) 323 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 324 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 325 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 326 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 327 self.e_windowsize[e_name] = n 328 break 329 330 self._dvalue += self.e_dvalue[e_name] ** 2 331 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 332 333 for e_name in self.cov_names: 334 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 335 self.e_ddvalue[e_name] = 0 336 self._dvalue += self.e_dvalue[e_name]**2 337 338 self._dvalue = np.sqrt(self._dvalue) 339 if self._dvalue == 0.0: 340 self.ddvalue = 0.0 341 else: 342 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 343 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
383 def details(self, ens_content=True): 384 """Output detailed properties of the Obs. 385 386 Parameters 387 ---------- 388 ens_content : bool 389 print details about the ensembles and replica if true. 390 """ 391 if self.tag is not None: 392 print("Description:", self.tag) 393 if not hasattr(self, 'e_dvalue'): 394 print('Result\t %3.8e' % (self.value)) 395 else: 396 if self.value == 0.0: 397 percentage = np.nan 398 else: 399 percentage = np.abs(self._dvalue / self.value) * 100 400 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 401 if len(self.e_names) > 1: 402 print(' Ensemble errors:') 403 e_content = self.e_content 404 for e_name in self.mc_names: 405 gap = _determine_gap(self, e_content, e_name) 406 407 if len(self.e_names) > 1: 408 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 409 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 410 tau_string += f" in units of {gap} config" 411 if gap > 1: 412 tau_string += "s" 413 if self.tau_exp[e_name] > 0: 414 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 415 else: 416 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 417 print(tau_string) 418 for e_name in self.cov_names: 419 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 420 if ens_content is True: 421 if len(self.e_names) == 1: 422 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 423 else: 424 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 425 my_string_list = [] 426 for key, value in sorted(self.e_content.items()): 427 if key not in self.covobs: 428 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 429 if len(value) == 1: 430 my_string += f': {self.shape[value[0]]} configurations' 431 if isinstance(self.idl[value[0]], range): 432 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 433 else: 434 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 435 else: 436 sublist = [] 437 for v in value: 438 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 439 my_substring += f': {self.shape[v]} configurations' 440 if isinstance(self.idl[v], range): 441 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 442 else: 443 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 444 sublist.append(my_substring) 445 446 my_string += '\n' + '\n'.join(sublist) 447 else: 448 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 449 my_string_list.append(my_string) 450 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
452 def reweight(self, weight): 453 """Reweight the obs with given rewighting factors. 454 455 Parameters 456 ---------- 457 weight : Obs 458 Reweighting factor. An Observable that has to be defined on a superset of the 459 configurations in obs[i].idl for all i. 460 all_configs : bool 461 if True, the reweighted observables are normalized by the average of 462 the reweighting factor on all configurations in weight.idl and not 463 on the configurations in obs[i].idl. Default False. 464 """ 465 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
467 def is_zero_within_error(self, sigma=1): 468 """Checks whether the observable is zero within 'sigma' standard errors. 469 470 Parameters 471 ---------- 472 sigma : int 473 Number of standard errors used for the check. 474 475 Works only properly when the gamma method was run. 476 """ 477 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
479 def is_zero(self, atol=1e-10): 480 """Checks whether the observable is zero within a given tolerance. 481 482 Parameters 483 ---------- 484 atol : float 485 Absolute tolerance (for details see numpy documentation). 486 """ 487 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
489 def plot_tauint(self, save=None): 490 """Plot integrated autocorrelation time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 500 for e, e_name in enumerate(self.mc_names): 501 fig = plt.figure() 502 plt.xlabel(r'$W$') 503 plt.ylabel(r'$\tau_\mathrm{int}$') 504 length = int(len(self.e_n_tauint[e_name])) 505 if self.tau_exp[e_name] > 0: 506 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 507 x_help = np.arange(2 * self.tau_exp[e_name]) 508 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 509 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 510 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 511 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 512 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 513 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 514 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 515 else: 516 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 517 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 518 519 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 520 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 521 plt.legend() 522 plt.xlim(-0.5, xmax) 523 ylim = plt.ylim() 524 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 525 plt.draw() 526 if save: 527 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
529 def plot_rho(self, save=None): 530 """Plot normalized autocorrelation function time for each ensemble. 531 532 Parameters 533 ---------- 534 save : str 535 saves the figure to a file named 'save' if. 536 """ 537 if not hasattr(self, 'e_dvalue'): 538 raise Exception('Run the gamma method first.') 539 for e, e_name in enumerate(self.mc_names): 540 fig = plt.figure() 541 plt.xlabel('W') 542 plt.ylabel('rho') 543 length = int(len(self.e_drho[e_name])) 544 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 545 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 546 if self.tau_exp[e_name] > 0: 547 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 548 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 549 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 550 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 551 else: 552 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 553 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 554 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 555 plt.xlim(-0.5, xmax) 556 plt.draw() 557 if save: 558 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
560 def plot_rep_dist(self): 561 """Plot replica distribution for each ensemble with more than one replicum.""" 562 if not hasattr(self, 'e_dvalue'): 563 raise Exception('Run the gamma method first.') 564 for e, e_name in enumerate(self.mc_names): 565 if len(self.e_content[e_name]) == 1: 566 print('No replica distribution for a single replicum (', e_name, ')') 567 continue 568 r_length = [] 569 sub_r_mean = 0 570 for r, r_name in enumerate(self.e_content[e_name]): 571 r_length.append(len(self.deltas[r_name])) 572 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 573 e_N = np.sum(r_length) 574 sub_r_mean /= e_N 575 arr = np.zeros(len(self.e_content[e_name])) 576 for r, r_name in enumerate(self.e_content[e_name]): 577 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 578 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 579 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 580 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
582 def plot_history(self, expand=True): 583 """Plot derived Monte Carlo history for each ensemble 584 585 Parameters 586 ---------- 587 expand : bool 588 show expanded history for irregular Monte Carlo chains (default: True). 589 """ 590 for e, e_name in enumerate(self.mc_names): 591 plt.figure() 592 r_length = [] 593 tmp = [] 594 tmp_expanded = [] 595 for r, r_name in enumerate(self.e_content[e_name]): 596 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 597 if expand: 598 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 599 r_length.append(len(tmp_expanded[-1])) 600 else: 601 r_length.append(len(tmp[-1])) 602 e_N = np.sum(r_length) 603 x = np.arange(e_N) 604 y_test = np.concatenate(tmp, axis=0) 605 if expand: 606 y = np.concatenate(tmp_expanded, axis=0) 607 else: 608 y = y_test 609 plt.errorbar(x, y, fmt='.', markersize=3) 610 plt.xlim(-0.5, e_N - 0.5) 611 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 612 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
614 def plot_piechart(self, save=None): 615 """Plot piechart which shows the fractional contribution of each 616 ensemble to the error and returns a dictionary containing the fractions. 617 618 Parameters 619 ---------- 620 save : str 621 saves the figure to a file named 'save' if. 622 """ 623 if not hasattr(self, 'e_dvalue'): 624 raise Exception('Run the gamma method first.') 625 if np.isclose(0.0, self._dvalue, atol=1e-15): 626 raise ValueError('Error is 0.0') 627 labels = self.e_names 628 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 629 fig1, ax1 = plt.subplots() 630 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 631 ax1.axis('equal') 632 plt.draw() 633 if save: 634 fig1.savefig(save) 635 636 return dict(zip(labels, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
638 def dump(self, filename, datatype="json.gz", description="", **kwargs): 639 """Dump the Obs to a file 'name' of chosen format. 640 641 Parameters 642 ---------- 643 filename : str 644 name of the file to be saved. 645 datatype : str 646 Format of the exported file. Supported formats include 647 "json.gz" and "pickle" 648 description : str 649 Description for output file, only relevant for json.gz format. 650 path : str 651 specifies a custom path for the file (default '.') 652 """ 653 if 'path' in kwargs: 654 file_name = kwargs.get('path') + '/' + filename 655 else: 656 file_name = filename 657 658 if datatype == "json.gz": 659 from .input.json import dump_to_json 660 dump_to_json([self], file_name, description=description) 661 elif datatype == "pickle": 662 with open(file_name + '.p', 'wb') as fb: 663 pickle.dump(self, fb) 664 else: 665 raise TypeError("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
667 def export_jackknife(self): 668 """Export jackknife samples from the Obs 669 670 Returns 671 ------- 672 numpy.ndarray 673 Returns a numpy array of length N + 1 where N is the number of samples 674 for the given ensemble and replicum. The zeroth entry of the array contains 675 the mean value of the Obs, entries 1 to N contain the N jackknife samples 676 derived from the Obs. The current implementation only works for observables 677 defined on exactly one ensemble and replicum. The derived jackknife samples 678 should agree with samples from a full jackknife analysis up to O(1/N). 679 """ 680 681 if len(self.names) != 1: 682 raise ValueError("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 683 684 name = self.names[0] 685 full_data = self.deltas[name] + self.r_values[name] 686 n = full_data.size 687 mean = self.value 688 tmp_jacks = np.zeros(n + 1) 689 tmp_jacks[0] = mean 690 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 691 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
693 def export_bootstrap(self, samples=500, random_numbers=None, save_rng=None): 694 """Export bootstrap samples from the Obs 695 696 Parameters 697 ---------- 698 samples : int 699 Number of bootstrap samples to generate. 700 random_numbers : np.ndarray 701 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. 702 If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name. 703 save_rng : str 704 Save the random numbers to a file if a path is specified. 705 706 Returns 707 ------- 708 numpy.ndarray 709 Returns a numpy array of length N + 1 where N is the number of samples 710 for the given ensemble and replicum. The zeroth entry of the array contains 711 the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples 712 derived from the Obs. The current implementation only works for observables 713 defined on exactly one ensemble and replicum. The derived bootstrap samples 714 should agree with samples from a full bootstrap analysis up to O(1/N). 715 """ 716 if len(self.names) != 1: 717 raise ValueError("'export_boostrap' is only implemented for Obs defined on one ensemble and replicum.") 718 719 name = self.names[0] 720 length = self.N 721 722 if random_numbers is None: 723 seed = int(hashlib.md5(name.encode()).hexdigest(), 16) & 0xFFFFFFFF 724 rng = np.random.default_rng(seed) 725 random_numbers = rng.integers(0, length, size=(samples, length)) 726 727 if save_rng is not None: 728 np.savetxt(save_rng, random_numbers, fmt='%i') 729 730 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 731 ret = np.zeros(samples + 1) 732 ret[0] = self.value 733 ret[1:] = proj @ (self.deltas[name] + self.r_values[name]) 734 return ret
Export bootstrap samples from the Obs
Parameters
- samples (int): Number of bootstrap samples to generate.
- random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples. If not provided the bootstrap samples are generated bashed on the md5 hash of the enesmble name.
- save_rng (str): Save the random numbers to a file if a path is specified.
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N import_bootstrap samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived bootstrap samples should agree with samples from a full bootstrap analysis up to O(1/N).
919class CObs: 920 """Class for a complex valued observable.""" 921 __slots__ = ['_real', '_imag', 'tag'] 922 923 def __init__(self, real, imag=0.0): 924 self._real = real 925 self._imag = imag 926 self.tag = None 927 928 @property 929 def real(self): 930 return self._real 931 932 @property 933 def imag(self): 934 return self._imag 935 936 def gamma_method(self, **kwargs): 937 """Executes the gamma_method for the real and the imaginary part.""" 938 if isinstance(self.real, Obs): 939 self.real.gamma_method(**kwargs) 940 if isinstance(self.imag, Obs): 941 self.imag.gamma_method(**kwargs) 942 943 def is_zero(self): 944 """Checks whether both real and imaginary part are zero within machine precision.""" 945 return self.real == 0.0 and self.imag == 0.0 946 947 def conjugate(self): 948 return CObs(self.real, -self.imag) 949 950 def __add__(self, other): 951 if isinstance(other, np.ndarray): 952 return other + self 953 elif hasattr(other, 'real') and hasattr(other, 'imag'): 954 return CObs(self.real + other.real, 955 self.imag + other.imag) 956 else: 957 return CObs(self.real + other, self.imag) 958 959 def __radd__(self, y): 960 return self + y 961 962 def __sub__(self, other): 963 if isinstance(other, np.ndarray): 964 return -1 * (other - self) 965 elif hasattr(other, 'real') and hasattr(other, 'imag'): 966 return CObs(self.real - other.real, self.imag - other.imag) 967 else: 968 return CObs(self.real - other, self.imag) 969 970 def __rsub__(self, other): 971 return -1 * (self - other) 972 973 def __mul__(self, other): 974 if isinstance(other, np.ndarray): 975 return other * self 976 elif hasattr(other, 'real') and hasattr(other, 'imag'): 977 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 978 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 979 [self.real, other.real, self.imag, other.imag], 980 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 981 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 982 [self.real, other.real, self.imag, other.imag], 983 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 984 elif getattr(other, 'imag', 0) != 0: 985 return CObs(self.real * other.real - self.imag * other.imag, 986 self.imag * other.real + self.real * other.imag) 987 else: 988 return CObs(self.real * other.real, self.imag * other.real) 989 else: 990 return CObs(self.real * other, self.imag * other) 991 992 def __rmul__(self, other): 993 return self * other 994 995 def __truediv__(self, other): 996 if isinstance(other, np.ndarray): 997 return 1 / (other / self) 998 elif hasattr(other, 'real') and hasattr(other, 'imag'): 999 r = other.real ** 2 + other.imag ** 2 1000 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 1001 else: 1002 return CObs(self.real / other, self.imag / other) 1003 1004 def __rtruediv__(self, other): 1005 r = self.real ** 2 + self.imag ** 2 1006 if hasattr(other, 'real') and hasattr(other, 'imag'): 1007 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 1008 else: 1009 return CObs(self.real * other / r, -self.imag * other / r) 1010 1011 def __abs__(self): 1012 return np.sqrt(self.real**2 + self.imag**2) 1013 1014 def __pos__(self): 1015 return self 1016 1017 def __neg__(self): 1018 return -1 * self 1019 1020 def __eq__(self, other): 1021 return self.real == other.real and self.imag == other.imag 1022 1023 def __str__(self): 1024 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 1025 1026 def __repr__(self): 1027 return 'CObs[' + str(self) + ']' 1028 1029 def __format__(self, format_type): 1030 if format_type == "": 1031 significance = 2 1032 format_type = "2" 1033 else: 1034 significance = int(float(format_type.replace("+", "").replace("-", ""))) 1035 return f"({self.real:{format_type}}{self.imag:+{significance}}j)"
Class for a complex valued observable.
936 def gamma_method(self, **kwargs): 937 """Executes the gamma_method for the real and the imaginary part.""" 938 if isinstance(self.real, Obs): 939 self.real.gamma_method(**kwargs) 940 if isinstance(self.imag, Obs): 941 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1038def gamma_method(x, **kwargs): 1039 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1040 1041 See docstring of pe.Obs.gamma_method for details. 1042 """ 1043 return np.vectorize(lambda o: o.gm(**kwargs))(x)
Vectorized version of the gamma_method applicable to lists or arrays of Obs.
See docstring of pe.Obs.gamma_method for details.
1038def gamma_method(x, **kwargs): 1039 """Vectorized version of the gamma_method applicable to lists or arrays of Obs. 1040 1041 See docstring of pe.Obs.gamma_method for details. 1042 """ 1043 return np.vectorize(lambda o: o.gm(**kwargs))(x)
Vectorized version of the gamma_method applicable to lists or arrays of Obs.
See docstring of pe.Obs.gamma_method for details.
1173def derived_observable(func, data, array_mode=False, **kwargs): 1174 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1175 1176 Parameters 1177 ---------- 1178 func : object 1179 arbitrary function of the form func(data, **kwargs). For the 1180 automatic differentiation to work, all numpy functions have to have 1181 the autograd wrapper (use 'import autograd.numpy as anp'). 1182 data : list 1183 list of Obs, e.g. [obs1, obs2, obs3]. 1184 num_grad : bool 1185 if True, numerical derivatives are used instead of autograd 1186 (default False). To control the numerical differentiation the 1187 kwargs of numdifftools.step_generators.MaxStepGenerator 1188 can be used. 1189 man_grad : list 1190 manually supply a list or an array which contains the jacobian 1191 of func. Use cautiously, supplying the wrong derivative will 1192 not be intercepted. 1193 1194 Notes 1195 ----- 1196 For simple mathematical operations it can be practical to use anonymous 1197 functions. For the ratio of two observables one can e.g. use 1198 1199 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1200 """ 1201 1202 data = np.asarray(data) 1203 raveled_data = data.ravel() 1204 1205 # Workaround for matrix operations containing non Obs data 1206 if not all(isinstance(x, Obs) for x in raveled_data): 1207 for i in range(len(raveled_data)): 1208 if isinstance(raveled_data[i], (int, float)): 1209 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1210 1211 allcov = {} 1212 for o in raveled_data: 1213 for name in o.cov_names: 1214 if name in allcov: 1215 if not np.allclose(allcov[name], o.covobs[name].cov): 1216 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1217 else: 1218 allcov[name] = o.covobs[name].cov 1219 1220 n_obs = len(raveled_data) 1221 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1222 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1223 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1224 1225 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1226 1227 if data.ndim == 1: 1228 values = np.array([o.value for o in data]) 1229 else: 1230 values = np.vectorize(lambda x: x.value)(data) 1231 1232 new_values = func(values, **kwargs) 1233 1234 multi = int(isinstance(new_values, np.ndarray)) 1235 1236 new_r_values = {} 1237 new_idl_d = {} 1238 for name in new_sample_names: 1239 idl = [] 1240 tmp_values = np.zeros(n_obs) 1241 for i, item in enumerate(raveled_data): 1242 tmp_values[i] = item.r_values.get(name, item.value) 1243 tmp_idl = item.idl.get(name) 1244 if tmp_idl is not None: 1245 idl.append(tmp_idl) 1246 if multi > 0: 1247 tmp_values = np.array(tmp_values).reshape(data.shape) 1248 new_r_values[name] = func(tmp_values, **kwargs) 1249 new_idl_d[name] = _merge_idx(idl) 1250 1251 def _compute_scalefactor_missing_rep(obs): 1252 """ 1253 Computes the scale factor that is to be multiplied with the deltas 1254 in the case where Obs with different subsets of replica are merged. 1255 Returns a dictionary with the scale factor for each Monte Carlo name. 1256 1257 Parameters 1258 ---------- 1259 obs : Obs 1260 The observable corresponding to the deltas that are to be scaled 1261 """ 1262 scalef_d = {} 1263 for mc_name in obs.mc_names: 1264 mc_idl_d = [name for name in obs.idl if name.startswith(mc_name + '|')] 1265 new_mc_idl_d = [name for name in new_idl_d if name.startswith(mc_name + '|')] 1266 if len(mc_idl_d) > 0 and len(mc_idl_d) < len(new_mc_idl_d): 1267 scalef_d[mc_name] = sum([len(new_idl_d[name]) for name in new_mc_idl_d]) / sum([len(new_idl_d[name]) for name in mc_idl_d]) 1268 return scalef_d 1269 1270 if 'man_grad' in kwargs: 1271 deriv = np.asarray(kwargs.get('man_grad')) 1272 if new_values.shape + data.shape != deriv.shape: 1273 raise ValueError('Manual derivative does not have correct shape.') 1274 elif kwargs.get('num_grad') is True: 1275 if multi > 0: 1276 raise Exception('Multi mode currently not supported for numerical derivative') 1277 options = { 1278 'base_step': 0.1, 1279 'step_ratio': 2.5} 1280 for key in options.keys(): 1281 kwarg = kwargs.get(key) 1282 if kwarg is not None: 1283 options[key] = kwarg 1284 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1285 if tmp_df.size == 1: 1286 deriv = np.array([tmp_df.real]) 1287 else: 1288 deriv = tmp_df.real 1289 else: 1290 deriv = jacobian(func)(values, **kwargs) 1291 1292 final_result = np.zeros(new_values.shape, dtype=object) 1293 1294 if array_mode is True: 1295 1296 class _Zero_grad(): 1297 def __init__(self, N): 1298 self.grad = np.zeros((N, 1)) 1299 1300 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1301 d_extracted = {} 1302 g_extracted = {} 1303 for name in new_sample_names: 1304 d_extracted[name] = [] 1305 ens_length = len(new_idl_d[name]) 1306 for i_dat, dat in enumerate(data): 1307 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name], _compute_scalefactor_missing_rep(o).get(name.split('|')[0], 1)) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1308 for name in new_cov_names: 1309 g_extracted[name] = [] 1310 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1311 for i_dat, dat in enumerate(data): 1312 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1313 1314 for i_val, new_val in np.ndenumerate(new_values): 1315 new_deltas = {} 1316 new_grad = {} 1317 if array_mode is True: 1318 for name in new_sample_names: 1319 ens_length = d_extracted[name][0].shape[-1] 1320 new_deltas[name] = np.zeros(ens_length) 1321 for i_dat, dat in enumerate(d_extracted[name]): 1322 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1323 for name in new_cov_names: 1324 new_grad[name] = 0 1325 for i_dat, dat in enumerate(g_extracted[name]): 1326 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1327 else: 1328 for j_obs, obs in np.ndenumerate(data): 1329 scalef_d = _compute_scalefactor_missing_rep(obs) 1330 for name in obs.names: 1331 if name in obs.cov_names: 1332 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1333 else: 1334 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name], scalef_d.get(name.split('|')[0], 1)) 1335 1336 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1337 1338 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1339 raise ValueError('The same name has been used for deltas and covobs!') 1340 new_samples = [] 1341 new_means = [] 1342 new_idl = [] 1343 new_names_obs = [] 1344 for name in new_names: 1345 if name not in new_covobs: 1346 new_samples.append(new_deltas[name]) 1347 new_idl.append(new_idl_d[name]) 1348 new_means.append(new_r_values[name][i_val]) 1349 new_names_obs.append(name) 1350 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1351 for name in new_covobs: 1352 final_result[i_val].names.append(name) 1353 final_result[i_val]._covobs = new_covobs 1354 final_result[i_val]._value = new_val 1355 final_result[i_val].reweighted = reweighted 1356 1357 if multi == 0: 1358 final_result = final_result.item() 1359 1360 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1392def reweight(weight, obs, **kwargs): 1393 """Reweight a list of observables. 1394 1395 Parameters 1396 ---------- 1397 weight : Obs 1398 Reweighting factor. An Observable that has to be defined on a superset of the 1399 configurations in obs[i].idl for all i. 1400 obs : list 1401 list of Obs, e.g. [obs1, obs2, obs3]. 1402 all_configs : bool 1403 if True, the reweighted observables are normalized by the average of 1404 the reweighting factor on all configurations in weight.idl and not 1405 on the configurations in obs[i].idl. Default False. 1406 """ 1407 result = [] 1408 for i in range(len(obs)): 1409 if len(obs[i].cov_names): 1410 raise ValueError('Error: Not possible to reweight an Obs that contains covobs!') 1411 if not set(obs[i].names).issubset(weight.names): 1412 raise ValueError('Error: Ensembles do not fit') 1413 if len(obs[i].mc_names) > 1 or len(weight.mc_names) > 1: 1414 raise ValueError('Error: Cannot reweight an Obs that contains multiple ensembles.') 1415 for name in obs[i].names: 1416 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1417 raise ValueError('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1418 new_samples = [] 1419 w_deltas = {} 1420 for name in sorted(obs[i].names): 1421 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1422 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1423 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1424 1425 if kwargs.get('all_configs'): 1426 new_weight = weight 1427 else: 1428 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1429 1430 result.append(tmp_obs / new_weight) 1431 result[-1].reweighted = True 1432 1433 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1436def correlate(obs_a, obs_b): 1437 """Correlate two observables. 1438 1439 Parameters 1440 ---------- 1441 obs_a : Obs 1442 First observable 1443 obs_b : Obs 1444 Second observable 1445 1446 Notes 1447 ----- 1448 Keep in mind to only correlate primary observables which have not been reweighted 1449 yet. The reweighting has to be applied after correlating the observables. 1450 Only works if a single ensemble is present in the Obs. 1451 Currently only works if ensemble content is identical (this is not strictly necessary). 1452 """ 1453 1454 if len(obs_a.mc_names) > 1 or len(obs_b.mc_names) > 1: 1455 raise ValueError('Error: Cannot correlate Obs that contain multiple ensembles.') 1456 if sorted(obs_a.names) != sorted(obs_b.names): 1457 raise ValueError(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1458 if len(obs_a.cov_names) or len(obs_b.cov_names): 1459 raise ValueError('Error: Not possible to correlate Obs that contain covobs!') 1460 for name in obs_a.names: 1461 if obs_a.shape[name] != obs_b.shape[name]: 1462 raise ValueError('Shapes of ensemble', name, 'do not fit') 1463 if obs_a.idl[name] != obs_b.idl[name]: 1464 raise ValueError('idl of ensemble', name, 'do not fit') 1465 1466 if obs_a.reweighted is True: 1467 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1468 if obs_b.reweighted is True: 1469 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1470 1471 new_samples = [] 1472 new_idl = [] 1473 for name in sorted(obs_a.names): 1474 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1475 new_idl.append(obs_a.idl[name]) 1476 1477 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1478 o.reweighted = obs_a.reweighted or obs_b.reweighted 1479 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Only works if a single ensemble is present in the Obs. Currently only works if ensemble content is identical (this is not strictly necessary).
1482def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1483 r'''Calculates the error covariance matrix of a set of observables. 1484 1485 WARNING: This function should be used with care, especially for observables with support on multiple 1486 ensembles with differing autocorrelations. See the notes below for details. 1487 1488 The gamma method has to be applied first to all observables. 1489 1490 Parameters 1491 ---------- 1492 obs : list or numpy.ndarray 1493 List or one dimensional array of Obs 1494 visualize : bool 1495 If True plots the corresponding normalized correlation matrix (default False). 1496 correlation : bool 1497 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1498 smooth : None or int 1499 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1500 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1501 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1502 small ones. 1503 1504 Notes 1505 ----- 1506 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1507 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1508 in the absence of autocorrelation. 1509 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1510 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1511 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1512 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1513 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1514 ''' 1515 1516 length = len(obs) 1517 1518 max_samples = np.max([o.N for o in obs]) 1519 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1520 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1521 1522 cov = np.zeros((length, length)) 1523 for i in range(length): 1524 for j in range(i, length): 1525 cov[i, j] = _covariance_element(obs[i], obs[j]) 1526 cov = cov + cov.T - np.diag(np.diag(cov)) 1527 1528 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1529 1530 if isinstance(smooth, int): 1531 corr = _smooth_eigenvalues(corr, smooth) 1532 1533 if visualize: 1534 plt.matshow(corr, vmin=-1, vmax=1) 1535 plt.set_cmap('RdBu') 1536 plt.colorbar() 1537 plt.draw() 1538 1539 if correlation is True: 1540 return corr 1541 1542 errors = [o.dvalue for o in obs] 1543 cov = np.diag(errors) @ corr @ np.diag(errors) 1544 1545 eigenvalues = np.linalg.eigh(cov)[0] 1546 if not np.all(eigenvalues >= 0): 1547 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1548 1549 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1552def invert_corr_cov_cholesky(corr, inverrdiag): 1553 """Constructs a lower triangular matrix `chol` via the Cholesky decomposition of the correlation matrix `corr` 1554 and then returns the inverse covariance matrix `chol_inv` as a lower triangular matrix by solving `chol * x = inverrdiag`. 1555 1556 Parameters 1557 ---------- 1558 corr : np.ndarray 1559 correlation matrix 1560 inverrdiag : np.ndarray 1561 diagonal matrix, the entries are the inverse errors of the data points considered 1562 """ 1563 1564 condn = np.linalg.cond(corr) 1565 if condn > 0.1 / np.finfo(float).eps: 1566 raise ValueError(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})") 1567 if condn > 1e13: 1568 warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning) 1569 chol = np.linalg.cholesky(corr) 1570 chol_inv = scipy.linalg.solve_triangular(chol, inverrdiag, lower=True) 1571 1572 return chol_inv
Constructs a lower triangular matrix chol
via the Cholesky decomposition of the correlation matrix corr
and then returns the inverse covariance matrix chol_inv
as a lower triangular matrix by solving chol * x = inverrdiag
.
Parameters
- corr (np.ndarray): correlation matrix
- inverrdiag (np.ndarray): diagonal matrix, the entries are the inverse errors of the data points considered
1575def sort_corr(corr, kl, yd): 1576 """ Reorders a correlation matrix to match the alphabetical order of its underlying y data. 1577 1578 The ordering of the input correlation matrix `corr` is given by the list of keys `kl`. 1579 The input dictionary `yd` (with the same keys `kl`) must contain the corresponding y data 1580 that the correlation matrix is based on. 1581 This function sorts the list of keys `kl` alphabetically and sorts the matrix `corr` 1582 according to this alphabetical order such that the sorted matrix `corr_sorted` corresponds 1583 to the y data `yd` when arranged in an alphabetical order by its keys. 1584 1585 Parameters 1586 ---------- 1587 corr : np.ndarray 1588 A square correlation matrix constructed using the order of the y data specified by `kl`. 1589 The dimensions of `corr` should match the total number of y data points in `yd` combined. 1590 kl : list of str 1591 A list of keys that denotes the order in which the y data from `yd` was used to build the 1592 input correlation matrix `corr`. 1593 yd : dict of list 1594 A dictionary where each key corresponds to a unique identifier, and its value is a list of 1595 y data points. The total number of y data points across all keys must match the dimensions 1596 of `corr`. The lists in the dictionary can be lists of Obs. 1597 1598 Returns 1599 ------- 1600 np.ndarray 1601 A new, sorted correlation matrix that corresponds to the y data from `yd` when arranged alphabetically by its keys. 1602 1603 Example 1604 ------- 1605 >>> import numpy as np 1606 >>> import pyerrors as pe 1607 >>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]]) 1608 >>> kl = ['b', 'a'] 1609 >>> yd = {'a': [1, 2], 'b': [3]} 1610 >>> sorted_corr = pe.obs.sort_corr(corr, kl, yd) 1611 >>> print(sorted_corr) 1612 array([[1. , 0.3, 0.4], 1613 [0.3, 1. , 0.2], 1614 [0.4, 0.2, 1. ]]) 1615 1616 """ 1617 kl_sorted = sorted(kl) 1618 1619 posd = {} 1620 ofs = 0 1621 for ki, k in enumerate(kl): 1622 posd[k] = [i + ofs for i in range(len(yd[k]))] 1623 ofs += len(posd[k]) 1624 1625 mapping = [] 1626 for k in kl_sorted: 1627 for i in range(len(yd[k])): 1628 mapping.append(posd[k][i]) 1629 1630 corr_sorted = np.zeros_like(corr) 1631 for i in range(corr.shape[0]): 1632 for j in range(corr.shape[0]): 1633 corr_sorted[i][j] = corr[mapping[i]][mapping[j]] 1634 1635 return corr_sorted
Reorders a correlation matrix to match the alphabetical order of its underlying y data.
The ordering of the input correlation matrix corr
is given by the list of keys kl
.
The input dictionary yd
(with the same keys kl
) must contain the corresponding y data
that the correlation matrix is based on.
This function sorts the list of keys kl
alphabetically and sorts the matrix corr
according to this alphabetical order such that the sorted matrix corr_sorted
corresponds
to the y data yd
when arranged in an alphabetical order by its keys.
Parameters
- corr (np.ndarray):
A square correlation matrix constructed using the order of the y data specified by
kl
. The dimensions ofcorr
should match the total number of y data points inyd
combined. - kl (list of str):
A list of keys that denotes the order in which the y data from
yd
was used to build the input correlation matrixcorr
. - yd (dict of list):
A dictionary where each key corresponds to a unique identifier, and its value is a list of
y data points. The total number of y data points across all keys must match the dimensions
of
corr
. The lists in the dictionary can be lists of Obs.
Returns
- np.ndarray: A new, sorted correlation matrix that corresponds to the y data from
yd
when arranged alphabetically by its keys.
Example
>>> import numpy as np
>>> import pyerrors as pe
>>> corr = np.array([[1, 0.2, 0.3], [0.2, 1, 0.4], [0.3, 0.4, 1]])
>>> kl = ['b', 'a']
>>> yd = {'a': [1, 2], 'b': [3]}
>>> sorted_corr = pe.obs.sort_corr(corr, kl, yd)
>>> print(sorted_corr)
array([[1. , 0.3, 0.4],
[0.3, 1. , 0.2],
[0.4, 0.2, 1. ]])
1715def import_jackknife(jacks, name, idl=None): 1716 """Imports jackknife samples and returns an Obs 1717 1718 Parameters 1719 ---------- 1720 jacks : numpy.ndarray 1721 numpy array containing the mean value as zeroth entry and 1722 the N jackknife samples as first to Nth entry. 1723 name : str 1724 name of the ensemble the samples are defined on. 1725 """ 1726 length = len(jacks) - 1 1727 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1728 samples = jacks[1:] @ prj 1729 mean = np.mean(samples) 1730 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1731 new_obs._value = jacks[0] 1732 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1735def import_bootstrap(boots, name, random_numbers): 1736 """Imports bootstrap samples and returns an Obs 1737 1738 Parameters 1739 ---------- 1740 boots : numpy.ndarray 1741 numpy array containing the mean value as zeroth entry and 1742 the N bootstrap samples as first to Nth entry. 1743 name : str 1744 name of the ensemble the samples are defined on. 1745 random_numbers : np.ndarray 1746 Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, 1747 where samples is the number of bootstrap samples and length is the length of the original Monte Carlo 1748 chain to be reconstructed. 1749 """ 1750 samples, length = random_numbers.shape 1751 if samples != len(boots) - 1: 1752 raise ValueError("Random numbers do not have the correct shape.") 1753 1754 if samples < length: 1755 raise ValueError("Obs can't be reconstructed if there are fewer bootstrap samples than Monte Carlo data points.") 1756 1757 proj = np.vstack([np.bincount(o, minlength=length) for o in random_numbers]) / length 1758 1759 samples = scipy.linalg.lstsq(proj, boots[1:])[0] 1760 ret = Obs([samples], [name]) 1761 ret._value = boots[0] 1762 return ret
Imports bootstrap samples and returns an Obs
Parameters
- boots (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N bootstrap samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
- random_numbers (np.ndarray): Array of shape (samples, length) containing the random numbers to generate the bootstrap samples, where samples is the number of bootstrap samples and length is the length of the original Monte Carlo chain to be reconstructed.
1765def merge_obs(list_of_obs): 1766 """Combine all observables in list_of_obs into one new observable. 1767 This allows to merge Obs that have been computed on multiple replica 1768 of the same ensemble. 1769 If you like to merge Obs that are based on several ensembles, please 1770 average them yourself. 1771 1772 Parameters 1773 ---------- 1774 list_of_obs : list 1775 list of the Obs object to be combined 1776 1777 Notes 1778 ----- 1779 It is not possible to combine obs which are based on the same replicum 1780 """ 1781 replist = [item for obs in list_of_obs for item in obs.names] 1782 if (len(replist) == len(set(replist))) is False: 1783 raise ValueError('list_of_obs contains duplicate replica: %s' % (str(replist))) 1784 if any([len(o.cov_names) for o in list_of_obs]): 1785 raise ValueError('Not possible to merge data that contains covobs!') 1786 new_dict = {} 1787 idl_dict = {} 1788 for o in list_of_obs: 1789 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1790 for key in set(o.deltas) | set(o.r_values)}) 1791 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1792 1793 names = sorted(new_dict.keys()) 1794 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1795 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1796 return o
Combine all observables in list_of_obs into one new observable. This allows to merge Obs that have been computed on multiple replica of the same ensemble. If you like to merge Obs that are based on several ensembles, please average them yourself.
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1799def cov_Obs(means, cov, name, grad=None): 1800 """Create an Obs based on mean(s) and a covariance matrix 1801 1802 Parameters 1803 ---------- 1804 mean : list of floats or float 1805 N mean value(s) of the new Obs 1806 cov : list or array 1807 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1808 name : str 1809 identifier for the covariance matrix 1810 grad : list or array 1811 Gradient of the Covobs wrt. the means belonging to cov. 1812 """ 1813 1814 def covobs_to_obs(co): 1815 """Make an Obs out of a Covobs 1816 1817 Parameters 1818 ---------- 1819 co : Covobs 1820 Covobs to be embedded into the Obs 1821 """ 1822 o = Obs([], [], means=[]) 1823 o._value = co.value 1824 o.names.append(co.name) 1825 o._covobs[co.name] = co 1826 o._dvalue = np.sqrt(co.errsq()) 1827 return o 1828 1829 ol = [] 1830 if isinstance(means, (float, int)): 1831 means = [means] 1832 1833 for i in range(len(means)): 1834 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1835 if ol[0].covobs[name].N != len(means): 1836 raise ValueError('You have to provide %d mean values!' % (ol[0].N)) 1837 if len(ol) == 1: 1838 return ol[0] 1839 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.