pyerrors.obs
1import warnings 2import hashlib 3import pickle 4import numpy as np 5import autograd.numpy as anp # Thinly-wrapped numpy 6from autograd import jacobian 7import matplotlib.pyplot as plt 8from scipy.stats import skew, skewtest, kurtosis, kurtosistest 9import numdifftools as nd 10from itertools import groupby 11from .covobs import Covobs 12 13# Improve print output of numpy.ndarrays containing Obs objects. 14np.set_printoptions(formatter={'object': lambda x: str(x)}) 15 16 17class Obs: 18 """Class for a general observable. 19 20 Instances of Obs are the basic objects of a pyerrors error analysis. 21 They are initialized with a list which contains arrays of samples for 22 different ensembles/replica and another list of same length which contains 23 the names of the ensembles/replica. Mathematical operations can be 24 performed on instances. The result is another instance of Obs. The error of 25 an instance can be computed with the gamma_method. Also contains additional 26 methods for output and visualization of the error calculation. 27 28 Attributes 29 ---------- 30 S_global : float 31 Standard value for S (default 2.0) 32 S_dict : dict 33 Dictionary for S values. If an entry for a given ensemble 34 exists this overwrites the standard value for that ensemble. 35 tau_exp_global : float 36 Standard value for tau_exp (default 0.0) 37 tau_exp_dict : dict 38 Dictionary for tau_exp values. If an entry for a given ensemble exists 39 this overwrites the standard value for that ensemble. 40 N_sigma_global : float 41 Standard value for N_sigma (default 1.0) 42 N_sigma_dict : dict 43 Dictionary for N_sigma values. If an entry for a given ensemble exists 44 this overwrites the standard value for that ensemble. 45 """ 46 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 47 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 48 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 49 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 50 'idl', 'tag', '_covobs', '__dict__'] 51 52 S_global = 2.0 53 S_dict = {} 54 tau_exp_global = 0.0 55 tau_exp_dict = {} 56 N_sigma_global = 1.0 57 N_sigma_dict = {} 58 59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise ValueError('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise ValueError('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise ValueError('Names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise ValueError('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.idl = {} 99 if idl is not None: 100 for name, idx in sorted(zip(names, idl)): 101 if isinstance(idx, range): 102 self.idl[name] = idx 103 elif isinstance(idx, (list, np.ndarray)): 104 dc = np.unique(np.diff(idx)) 105 if np.any(dc < 0): 106 raise ValueError("Unsorted idx for idl[%s]" % (name)) 107 if len(dc) == 1: 108 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 109 else: 110 self.idl[name] = list(idx) 111 else: 112 raise TypeError('incompatible type for idl[%s].' % (name)) 113 else: 114 for name, sample in sorted(zip(names, samples)): 115 self.idl[name] = range(1, len(sample) + 1) 116 117 if kwargs.get("means") is not None: 118 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 119 self.shape[name] = len(self.idl[name]) 120 self.N += self.shape[name] 121 self.r_values[name] = mean 122 self.deltas[name] = sample 123 else: 124 for name, sample in sorted(zip(names, samples)): 125 self.shape[name] = len(self.idl[name]) 126 self.N += self.shape[name] 127 if len(sample) != self.shape[name]: 128 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 129 self.r_values[name] = np.mean(sample) 130 self.deltas[name] = sample - self.r_values[name] 131 self._value += self.shape[name] * self.r_values[name] 132 self._value /= self.N 133 134 self._dvalue = 0.0 135 self.ddvalue = 0.0 136 self.reweighted = False 137 138 self.tag = None 139 140 @property 141 def value(self): 142 return self._value 143 144 @property 145 def dvalue(self): 146 return self._dvalue 147 148 @property 149 def e_names(self): 150 return sorted(set([o.split('|')[0] for o in self.names])) 151 152 @property 153 def cov_names(self): 154 return sorted(set([o for o in self.covobs.keys()])) 155 156 @property 157 def mc_names(self): 158 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 159 160 @property 161 def e_content(self): 162 res = {} 163 for e, e_name in enumerate(self.e_names): 164 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 165 if e_name in self.names: 166 res[e_name].append(e_name) 167 return res 168 169 @property 170 def covobs(self): 171 return self._covobs 172 173 def gamma_method(self, **kwargs): 174 """Estimate the error and related properties of the Obs. 175 176 Parameters 177 ---------- 178 S : float 179 specifies a custom value for the parameter S (default 2.0). 180 If set to 0 it is assumed that the data exhibits no 181 autocorrelation. In this case the error estimates coincides 182 with the sample standard error. 183 tau_exp : float 184 positive value triggers the critical slowing down analysis 185 (default 0.0). 186 N_sigma : float 187 number of standard deviations from zero until the tail is 188 attached to the autocorrelation function (default 1). 189 fft : bool 190 determines whether the fft algorithm is used for the computation 191 of the autocorrelation function (default True) 192 """ 193 194 e_content = self.e_content 195 self.e_dvalue = {} 196 self.e_ddvalue = {} 197 self.e_tauint = {} 198 self.e_dtauint = {} 199 self.e_windowsize = {} 200 self.e_n_tauint = {} 201 self.e_n_dtauint = {} 202 e_gamma = {} 203 self.e_rho = {} 204 self.e_drho = {} 205 self._dvalue = 0 206 self.ddvalue = 0 207 208 self.S = {} 209 self.tau_exp = {} 210 self.N_sigma = {} 211 212 if kwargs.get('fft') is False: 213 fft = False 214 else: 215 fft = True 216 217 def _parse_kwarg(kwarg_name): 218 if kwarg_name in kwargs: 219 tmp = kwargs.get(kwarg_name) 220 if isinstance(tmp, (int, float)): 221 if tmp < 0: 222 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 223 for e, e_name in enumerate(self.e_names): 224 getattr(self, kwarg_name)[e_name] = tmp 225 else: 226 raise TypeError(kwarg_name + ' is not in proper format.') 227 else: 228 for e, e_name in enumerate(self.e_names): 229 if e_name in getattr(Obs, kwarg_name + '_dict'): 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 231 else: 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 233 234 _parse_kwarg('S') 235 _parse_kwarg('tau_exp') 236 _parse_kwarg('N_sigma') 237 238 for e, e_name in enumerate(self.mc_names): 239 gapsize = _determine_gap(self, e_content, e_name) 240 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 272 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 273 # Make sure no entry of tauint is smaller than 0.5 274 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 275 # hep-lat/0306017 eq. (42) 276 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 277 self.e_n_dtauint[e_name][0] = 0.0 278 279 def _compute_drho(i): 280 tmp = (self.e_rho[e_name][i + 1:w_max] 281 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 282 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 283 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 284 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 285 286 if self.tau_exp[e_name] > 0: 287 _compute_drho(1) 288 texp = self.tau_exp[e_name] 289 # Critical slowing down analysis 290 if w_max // 2 <= 1: 291 raise Exception("Need at least 8 samples for tau_exp error analysis") 292 for n in range(1, w_max // 2): 293 _compute_drho(n + 1) 294 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 295 # Bias correction hep-lat/0306017 eq. (49) included 296 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 297 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 298 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 299 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 300 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 301 self.e_windowsize[e_name] = n 302 break 303 else: 304 if self.S[e_name] == 0.0: 305 self.e_tauint[e_name] = 0.5 306 self.e_dtauint[e_name] = 0.0 307 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 308 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 309 self.e_windowsize[e_name] = 0 310 else: 311 # Standard automatic windowing procedure 312 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 313 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 314 for n in range(1, w_max): 315 if g_w[n - 1] < 0 or n >= w_max - 1: 316 _compute_drho(n) 317 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 318 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 319 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 320 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 321 self.e_windowsize[e_name] = n 322 break 323 324 self._dvalue += self.e_dvalue[e_name] ** 2 325 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 326 327 for e_name in self.cov_names: 328 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 329 self.e_ddvalue[e_name] = 0 330 self._dvalue += self.e_dvalue[e_name]**2 331 332 self._dvalue = np.sqrt(self._dvalue) 333 if self._dvalue == 0.0: 334 self.ddvalue = 0.0 335 else: 336 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 337 return 338 339 gm = gamma_method 340 341 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 342 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 343 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 344 345 Parameters 346 ---------- 347 deltas : list 348 List of fluctuations 349 idx : list 350 List or range of configurations on which the deltas are defined. 351 shape : int 352 Number of configurations in idx. 353 w_max : int 354 Upper bound for the summation window. 355 fft : bool 356 determines whether the fft algorithm is used for the computation 357 of the autocorrelation function. 358 gapsize : int 359 The target distance between two configurations. If longer distances 360 are found in idx, the data is expanded. 361 """ 362 gamma = np.zeros(w_max) 363 deltas = _expand_deltas(deltas, idx, shape, gapsize) 364 new_shape = len(deltas) 365 if fft: 366 max_gamma = min(new_shape, w_max) 367 # The padding for the fft has to be even 368 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 369 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 370 else: 371 for n in range(w_max): 372 if new_shape - n >= 0: 373 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 374 375 return gamma 376 377 def details(self, ens_content=True): 378 """Output detailed properties of the Obs. 379 380 Parameters 381 ---------- 382 ens_content : bool 383 print details about the ensembles and replica if true. 384 """ 385 if self.tag is not None: 386 print("Description:", self.tag) 387 if not hasattr(self, 'e_dvalue'): 388 print('Result\t %3.8e' % (self.value)) 389 else: 390 if self.value == 0.0: 391 percentage = np.nan 392 else: 393 percentage = np.abs(self._dvalue / self.value) * 100 394 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 395 if len(self.e_names) > 1: 396 print(' Ensemble errors:') 397 e_content = self.e_content 398 for e_name in self.mc_names: 399 gap = _determine_gap(self, e_content, e_name) 400 401 if len(self.e_names) > 1: 402 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 403 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 404 tau_string += f" in units of {gap} config" 405 if gap > 1: 406 tau_string += "s" 407 if self.tau_exp[e_name] > 0: 408 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 409 else: 410 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 411 print(tau_string) 412 for e_name in self.cov_names: 413 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 414 if ens_content is True: 415 if len(self.e_names) == 1: 416 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 417 else: 418 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 419 my_string_list = [] 420 for key, value in sorted(self.e_content.items()): 421 if key not in self.covobs: 422 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 423 if len(value) == 1: 424 my_string += f': {self.shape[value[0]]} configurations' 425 if isinstance(self.idl[value[0]], range): 426 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 427 else: 428 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 429 else: 430 sublist = [] 431 for v in value: 432 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 433 my_substring += f': {self.shape[v]} configurations' 434 if isinstance(self.idl[v], range): 435 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 436 else: 437 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 438 sublist.append(my_substring) 439 440 my_string += '\n' + '\n'.join(sublist) 441 else: 442 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 443 my_string_list.append(my_string) 444 print('\n'.join(my_string_list)) 445 446 def reweight(self, weight): 447 """Reweight the obs with given rewighting factors. 448 449 Parameters 450 ---------- 451 weight : Obs 452 Reweighting factor. An Observable that has to be defined on a superset of the 453 configurations in obs[i].idl for all i. 454 all_configs : bool 455 if True, the reweighted observables are normalized by the average of 456 the reweighting factor on all configurations in weight.idl and not 457 on the configurations in obs[i].idl. Default False. 458 """ 459 return reweight(weight, [self])[0] 460 461 def is_zero_within_error(self, sigma=1): 462 """Checks whether the observable is zero within 'sigma' standard errors. 463 464 Parameters 465 ---------- 466 sigma : int 467 Number of standard errors used for the check. 468 469 Works only properly when the gamma method was run. 470 """ 471 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 472 473 def is_zero(self, atol=1e-10): 474 """Checks whether the observable is zero within a given tolerance. 475 476 Parameters 477 ---------- 478 atol : float 479 Absolute tolerance (for details see numpy documentation). 480 """ 481 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 482 483 def plot_tauint(self, save=None): 484 """Plot integrated autocorrelation time for each ensemble. 485 486 Parameters 487 ---------- 488 save : str 489 saves the figure to a file named 'save' if. 490 """ 491 if not hasattr(self, 'e_dvalue'): 492 raise Exception('Run the gamma method first.') 493 494 for e, e_name in enumerate(self.mc_names): 495 fig = plt.figure() 496 plt.xlabel(r'$W$') 497 plt.ylabel(r'$\tau_\mathrm{int}$') 498 length = int(len(self.e_n_tauint[e_name])) 499 if self.tau_exp[e_name] > 0: 500 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 501 x_help = np.arange(2 * self.tau_exp[e_name]) 502 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 503 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 504 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 505 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 506 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 507 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 508 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 509 else: 510 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 511 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 512 513 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 514 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 515 plt.legend() 516 plt.xlim(-0.5, xmax) 517 ylim = plt.ylim() 518 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 519 plt.draw() 520 if save: 521 fig.savefig(save + "_" + str(e)) 522 523 def plot_rho(self, save=None): 524 """Plot normalized autocorrelation function time for each ensemble. 525 526 Parameters 527 ---------- 528 save : str 529 saves the figure to a file named 'save' if. 530 """ 531 if not hasattr(self, 'e_dvalue'): 532 raise Exception('Run the gamma method first.') 533 for e, e_name in enumerate(self.mc_names): 534 fig = plt.figure() 535 plt.xlabel('W') 536 plt.ylabel('rho') 537 length = int(len(self.e_drho[e_name])) 538 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 539 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 540 if self.tau_exp[e_name] > 0: 541 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 542 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 543 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 544 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 545 else: 546 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 547 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 548 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 549 plt.xlim(-0.5, xmax) 550 plt.draw() 551 if save: 552 fig.savefig(save + "_" + str(e)) 553 554 def plot_rep_dist(self): 555 """Plot replica distribution for each ensemble with more than one replicum.""" 556 if not hasattr(self, 'e_dvalue'): 557 raise Exception('Run the gamma method first.') 558 for e, e_name in enumerate(self.mc_names): 559 if len(self.e_content[e_name]) == 1: 560 print('No replica distribution for a single replicum (', e_name, ')') 561 continue 562 r_length = [] 563 sub_r_mean = 0 564 for r, r_name in enumerate(self.e_content[e_name]): 565 r_length.append(len(self.deltas[r_name])) 566 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 567 e_N = np.sum(r_length) 568 sub_r_mean /= e_N 569 arr = np.zeros(len(self.e_content[e_name])) 570 for r, r_name in enumerate(self.e_content[e_name]): 571 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 572 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 573 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 574 plt.draw() 575 576 def plot_history(self, expand=True): 577 """Plot derived Monte Carlo history for each ensemble 578 579 Parameters 580 ---------- 581 expand : bool 582 show expanded history for irregular Monte Carlo chains (default: True). 583 """ 584 for e, e_name in enumerate(self.mc_names): 585 plt.figure() 586 r_length = [] 587 tmp = [] 588 tmp_expanded = [] 589 for r, r_name in enumerate(self.e_content[e_name]): 590 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 591 if expand: 592 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 593 r_length.append(len(tmp_expanded[-1])) 594 else: 595 r_length.append(len(tmp[-1])) 596 e_N = np.sum(r_length) 597 x = np.arange(e_N) 598 y_test = np.concatenate(tmp, axis=0) 599 if expand: 600 y = np.concatenate(tmp_expanded, axis=0) 601 else: 602 y = y_test 603 plt.errorbar(x, y, fmt='.', markersize=3) 604 plt.xlim(-0.5, e_N - 0.5) 605 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 606 plt.draw() 607 608 def plot_piechart(self, save=None): 609 """Plot piechart which shows the fractional contribution of each 610 ensemble to the error and returns a dictionary containing the fractions. 611 612 Parameters 613 ---------- 614 save : str 615 saves the figure to a file named 'save' if. 616 """ 617 if not hasattr(self, 'e_dvalue'): 618 raise Exception('Run the gamma method first.') 619 if np.isclose(0.0, self._dvalue, atol=1e-15): 620 raise Exception('Error is 0.0') 621 labels = self.e_names 622 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 623 fig1, ax1 = plt.subplots() 624 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 625 ax1.axis('equal') 626 plt.draw() 627 if save: 628 fig1.savefig(save) 629 630 return dict(zip(self.e_names, sizes)) 631 632 def dump(self, filename, datatype="json.gz", description="", **kwargs): 633 """Dump the Obs to a file 'name' of chosen format. 634 635 Parameters 636 ---------- 637 filename : str 638 name of the file to be saved. 639 datatype : str 640 Format of the exported file. Supported formats include 641 "json.gz" and "pickle" 642 description : str 643 Description for output file, only relevant for json.gz format. 644 path : str 645 specifies a custom path for the file (default '.') 646 """ 647 if 'path' in kwargs: 648 file_name = kwargs.get('path') + '/' + filename 649 else: 650 file_name = filename 651 652 if datatype == "json.gz": 653 from .input.json import dump_to_json 654 dump_to_json([self], file_name, description=description) 655 elif datatype == "pickle": 656 with open(file_name + '.p', 'wb') as fb: 657 pickle.dump(self, fb) 658 else: 659 raise Exception("Unknown datatype " + str(datatype)) 660 661 def export_jackknife(self): 662 """Export jackknife samples from the Obs 663 664 Returns 665 ------- 666 numpy.ndarray 667 Returns a numpy array of length N + 1 where N is the number of samples 668 for the given ensemble and replicum. The zeroth entry of the array contains 669 the mean value of the Obs, entries 1 to N contain the N jackknife samples 670 derived from the Obs. The current implementation only works for observables 671 defined on exactly one ensemble and replicum. The derived jackknife samples 672 should agree with samples from a full jackknife analysis up to O(1/N). 673 """ 674 675 if len(self.names) != 1: 676 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 677 678 name = self.names[0] 679 full_data = self.deltas[name] + self.r_values[name] 680 n = full_data.size 681 mean = self.value 682 tmp_jacks = np.zeros(n + 1) 683 tmp_jacks[0] = mean 684 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 685 return tmp_jacks 686 687 def __float__(self): 688 return float(self.value) 689 690 def __repr__(self): 691 return 'Obs[' + str(self) + ']' 692 693 def __str__(self): 694 return _format_uncertainty(self.value, self._dvalue) 695 696 def __format__(self, format_type): 697 if format_type == "": 698 significance = 2 699 else: 700 significance = int(float(format_type.replace("+", "").replace("-", ""))) 701 my_str = _format_uncertainty(self.value, self._dvalue, 702 significance=significance) 703 for char in ["+", " "]: 704 if format_type.startswith(char): 705 if my_str[0] != "-": 706 my_str = char + my_str 707 return my_str 708 709 def __hash__(self): 710 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 711 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 712 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 713 hash_tuple += tuple([o.encode() for o in self.names]) 714 m = hashlib.md5() 715 [m.update(o) for o in hash_tuple] 716 return int(m.hexdigest(), 16) & 0xFFFFFFFF 717 718 # Overload comparisons 719 def __lt__(self, other): 720 return self.value < other 721 722 def __le__(self, other): 723 return self.value <= other 724 725 def __gt__(self, other): 726 return self.value > other 727 728 def __ge__(self, other): 729 return self.value >= other 730 731 def __eq__(self, other): 732 return (self - other).is_zero() 733 734 def __ne__(self, other): 735 return not (self - other).is_zero() 736 737 # Overload math operations 738 def __add__(self, y): 739 if isinstance(y, Obs): 740 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 741 else: 742 if isinstance(y, np.ndarray): 743 return np.array([self + o for o in y]) 744 elif y.__class__.__name__ in ['Corr', 'CObs']: 745 return NotImplemented 746 else: 747 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 748 749 def __radd__(self, y): 750 return self + y 751 752 def __mul__(self, y): 753 if isinstance(y, Obs): 754 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 755 else: 756 if isinstance(y, np.ndarray): 757 return np.array([self * o for o in y]) 758 elif isinstance(y, complex): 759 return CObs(self * y.real, self * y.imag) 760 elif y.__class__.__name__ in ['Corr', 'CObs']: 761 return NotImplemented 762 else: 763 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 764 765 def __rmul__(self, y): 766 return self * y 767 768 def __sub__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 771 else: 772 if isinstance(y, np.ndarray): 773 return np.array([self - o for o in y]) 774 elif y.__class__.__name__ in ['Corr', 'CObs']: 775 return NotImplemented 776 else: 777 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 778 779 def __rsub__(self, y): 780 return -1 * (self - y) 781 782 def __pos__(self): 783 return self 784 785 def __neg__(self): 786 return -1 * self 787 788 def __truediv__(self, y): 789 if isinstance(y, Obs): 790 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 791 else: 792 if isinstance(y, np.ndarray): 793 return np.array([self / o for o in y]) 794 elif y.__class__.__name__ in ['Corr', 'CObs']: 795 return NotImplemented 796 else: 797 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 798 799 def __rtruediv__(self, y): 800 if isinstance(y, Obs): 801 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 802 else: 803 if isinstance(y, np.ndarray): 804 return np.array([o / self for o in y]) 805 elif y.__class__.__name__ in ['Corr', 'CObs']: 806 return NotImplemented 807 else: 808 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 809 810 def __pow__(self, y): 811 if isinstance(y, Obs): 812 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 813 else: 814 return derived_observable(lambda x: x[0] ** y, [self]) 815 816 def __rpow__(self, y): 817 if isinstance(y, Obs): 818 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 819 else: 820 return derived_observable(lambda x: y ** x[0], [self]) 821 822 def __abs__(self): 823 return derived_observable(lambda x: anp.abs(x[0]), [self]) 824 825 # Overload numpy functions 826 def sqrt(self): 827 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 828 829 def log(self): 830 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 831 832 def exp(self): 833 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 834 835 def sin(self): 836 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 837 838 def cos(self): 839 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 840 841 def tan(self): 842 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 843 844 def arcsin(self): 845 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 846 847 def arccos(self): 848 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 849 850 def arctan(self): 851 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 852 853 def sinh(self): 854 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 855 856 def cosh(self): 857 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 858 859 def tanh(self): 860 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 861 862 def arcsinh(self): 863 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 864 865 def arccosh(self): 866 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 867 868 def arctanh(self): 869 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 870 871 872class CObs: 873 """Class for a complex valued observable.""" 874 __slots__ = ['_real', '_imag', 'tag'] 875 876 def __init__(self, real, imag=0.0): 877 self._real = real 878 self._imag = imag 879 self.tag = None 880 881 @property 882 def real(self): 883 return self._real 884 885 @property 886 def imag(self): 887 return self._imag 888 889 def gamma_method(self, **kwargs): 890 """Executes the gamma_method for the real and the imaginary part.""" 891 if isinstance(self.real, Obs): 892 self.real.gamma_method(**kwargs) 893 if isinstance(self.imag, Obs): 894 self.imag.gamma_method(**kwargs) 895 896 def is_zero(self): 897 """Checks whether both real and imaginary part are zero within machine precision.""" 898 return self.real == 0.0 and self.imag == 0.0 899 900 def conjugate(self): 901 return CObs(self.real, -self.imag) 902 903 def __add__(self, other): 904 if isinstance(other, np.ndarray): 905 return other + self 906 elif hasattr(other, 'real') and hasattr(other, 'imag'): 907 return CObs(self.real + other.real, 908 self.imag + other.imag) 909 else: 910 return CObs(self.real + other, self.imag) 911 912 def __radd__(self, y): 913 return self + y 914 915 def __sub__(self, other): 916 if isinstance(other, np.ndarray): 917 return -1 * (other - self) 918 elif hasattr(other, 'real') and hasattr(other, 'imag'): 919 return CObs(self.real - other.real, self.imag - other.imag) 920 else: 921 return CObs(self.real - other, self.imag) 922 923 def __rsub__(self, other): 924 return -1 * (self - other) 925 926 def __mul__(self, other): 927 if isinstance(other, np.ndarray): 928 return other * self 929 elif hasattr(other, 'real') and hasattr(other, 'imag'): 930 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 931 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 932 [self.real, other.real, self.imag, other.imag], 933 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 934 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 935 [self.real, other.real, self.imag, other.imag], 936 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 937 elif getattr(other, 'imag', 0) != 0: 938 return CObs(self.real * other.real - self.imag * other.imag, 939 self.imag * other.real + self.real * other.imag) 940 else: 941 return CObs(self.real * other.real, self.imag * other.real) 942 else: 943 return CObs(self.real * other, self.imag * other) 944 945 def __rmul__(self, other): 946 return self * other 947 948 def __truediv__(self, other): 949 if isinstance(other, np.ndarray): 950 return 1 / (other / self) 951 elif hasattr(other, 'real') and hasattr(other, 'imag'): 952 r = other.real ** 2 + other.imag ** 2 953 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 954 else: 955 return CObs(self.real / other, self.imag / other) 956 957 def __rtruediv__(self, other): 958 r = self.real ** 2 + self.imag ** 2 959 if hasattr(other, 'real') and hasattr(other, 'imag'): 960 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 961 else: 962 return CObs(self.real * other / r, -self.imag * other / r) 963 964 def __abs__(self): 965 return np.sqrt(self.real**2 + self.imag**2) 966 967 def __pos__(self): 968 return self 969 970 def __neg__(self): 971 return -1 * self 972 973 def __eq__(self, other): 974 return self.real == other.real and self.imag == other.imag 975 976 def __str__(self): 977 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 978 979 def __repr__(self): 980 return 'CObs[' + str(self) + ']' 981 982 983def _format_uncertainty(value, dvalue, significance=2): 984 """Creates a string of a value and its error in paranthesis notation, e.g., 13.02(45)""" 985 if dvalue == 0.0 or (not np.isfinite(dvalue)): 986 return str(value) 987 if not isinstance(significance, int): 988 raise TypeError("significance needs to be an integer.") 989 if significance < 1: 990 raise ValueError("significance needs to be larger than zero.") 991 fexp = np.floor(np.log10(dvalue)) 992 if fexp < 0.0: 993 return '{:{form}}({:1.0f})'.format(value, dvalue * 10 ** (-fexp + significance - 1), form='.' + str(-int(fexp) + significance - 1) + 'f') 994 elif fexp == 0.0: 995 return f"{value:.{significance - 1}f}({dvalue:1.{significance - 1}f})" 996 else: 997 return f"{value:.{max(0, int(significance - fexp - 1))}f}({dvalue:2.{max(0, int(significance - fexp - 1))}f})" 998 999 1000def _expand_deltas(deltas, idx, shape, gapsize): 1001 """Expand deltas defined on idx to a regular range with spacing gapsize between two 1002 configurations and where holes are filled by 0. 1003 If idx is of type range, the deltas are not changed if the idx.step == gapsize. 1004 1005 Parameters 1006 ---------- 1007 deltas : list 1008 List of fluctuations 1009 idx : list 1010 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 1011 shape : int 1012 Number of configs in idx. 1013 gapsize : int 1014 The target distance between two configurations. If longer distances 1015 are found in idx, the data is expanded. 1016 """ 1017 if isinstance(idx, range): 1018 if (idx.step == gapsize): 1019 return deltas 1020 ret = np.zeros((idx[-1] - idx[0] + gapsize) // gapsize) 1021 for i in range(shape): 1022 ret[(idx[i] - idx[0]) // gapsize] = deltas[i] 1023 return ret 1024 1025 1026def _merge_idx(idl): 1027 """Returns the union of all lists in idl as range or sorted list 1028 1029 Parameters 1030 ---------- 1031 idl : list 1032 List of lists or ranges. 1033 """ 1034 1035 if _check_lists_equal(idl): 1036 return idl[0] 1037 1038 idunion = sorted(set().union(*idl)) 1039 1040 # Check whether idunion can be expressed as range 1041 idrange = range(idunion[0], idunion[-1] + 1, idunion[1] - idunion[0]) 1042 idtest = [list(idrange), idunion] 1043 if _check_lists_equal(idtest): 1044 return idrange 1045 1046 return idunion 1047 1048 1049def _intersection_idx(idl): 1050 """Returns the intersection of all lists in idl as range or sorted list 1051 1052 Parameters 1053 ---------- 1054 idl : list 1055 List of lists or ranges. 1056 """ 1057 1058 if _check_lists_equal(idl): 1059 return idl[0] 1060 1061 idinter = sorted(set.intersection(*[set(o) for o in idl])) 1062 1063 # Check whether idinter can be expressed as range 1064 try: 1065 idrange = range(idinter[0], idinter[-1] + 1, idinter[1] - idinter[0]) 1066 idtest = [list(idrange), idinter] 1067 if _check_lists_equal(idtest): 1068 return idrange 1069 except IndexError: 1070 pass 1071 1072 return idinter 1073 1074 1075def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1076 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1077 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1078 common divisor of the step sizes is used as new step size. 1079 1080 Parameters 1081 ---------- 1082 deltas : list 1083 List of fluctuations 1084 idx : list 1085 List or range of configs on which the deltas are defined. 1086 Has to be a subset of new_idx and has to be sorted in ascending order. 1087 shape : list 1088 Number of configs in idx. 1089 new_idx : list 1090 List of configs that defines the new range, has to be sorted in ascending order. 1091 """ 1092 1093 if type(idx) is range and type(new_idx) is range: 1094 if idx == new_idx: 1095 return deltas 1096 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1097 for i in range(shape): 1098 ret[idx[i] - new_idx[0]] = deltas[i] 1099 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx) 1100 1101 1102def derived_observable(func, data, array_mode=False, **kwargs): 1103 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1104 1105 Parameters 1106 ---------- 1107 func : object 1108 arbitrary function of the form func(data, **kwargs). For the 1109 automatic differentiation to work, all numpy functions have to have 1110 the autograd wrapper (use 'import autograd.numpy as anp'). 1111 data : list 1112 list of Obs, e.g. [obs1, obs2, obs3]. 1113 num_grad : bool 1114 if True, numerical derivatives are used instead of autograd 1115 (default False). To control the numerical differentiation the 1116 kwargs of numdifftools.step_generators.MaxStepGenerator 1117 can be used. 1118 man_grad : list 1119 manually supply a list or an array which contains the jacobian 1120 of func. Use cautiously, supplying the wrong derivative will 1121 not be intercepted. 1122 1123 Notes 1124 ----- 1125 For simple mathematical operations it can be practical to use anonymous 1126 functions. For the ratio of two observables one can e.g. use 1127 1128 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1129 """ 1130 1131 data = np.asarray(data) 1132 raveled_data = data.ravel() 1133 1134 # Workaround for matrix operations containing non Obs data 1135 if not all(isinstance(x, Obs) for x in raveled_data): 1136 for i in range(len(raveled_data)): 1137 if isinstance(raveled_data[i], (int, float)): 1138 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1139 1140 allcov = {} 1141 for o in raveled_data: 1142 for name in o.cov_names: 1143 if name in allcov: 1144 if not np.allclose(allcov[name], o.covobs[name].cov): 1145 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1146 else: 1147 allcov[name] = o.covobs[name].cov 1148 1149 n_obs = len(raveled_data) 1150 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1151 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1152 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1153 1154 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1155 1156 if data.ndim == 1: 1157 values = np.array([o.value for o in data]) 1158 else: 1159 values = np.vectorize(lambda x: x.value)(data) 1160 1161 new_values = func(values, **kwargs) 1162 1163 multi = int(isinstance(new_values, np.ndarray)) 1164 1165 new_r_values = {} 1166 new_idl_d = {} 1167 for name in new_sample_names: 1168 idl = [] 1169 tmp_values = np.zeros(n_obs) 1170 for i, item in enumerate(raveled_data): 1171 tmp_values[i] = item.r_values.get(name, item.value) 1172 tmp_idl = item.idl.get(name) 1173 if tmp_idl is not None: 1174 idl.append(tmp_idl) 1175 if multi > 0: 1176 tmp_values = np.array(tmp_values).reshape(data.shape) 1177 new_r_values[name] = func(tmp_values, **kwargs) 1178 new_idl_d[name] = _merge_idx(idl) 1179 1180 if 'man_grad' in kwargs: 1181 deriv = np.asarray(kwargs.get('man_grad')) 1182 if new_values.shape + data.shape != deriv.shape: 1183 raise Exception('Manual derivative does not have correct shape.') 1184 elif kwargs.get('num_grad') is True: 1185 if multi > 0: 1186 raise Exception('Multi mode currently not supported for numerical derivative') 1187 options = { 1188 'base_step': 0.1, 1189 'step_ratio': 2.5} 1190 for key in options.keys(): 1191 kwarg = kwargs.get(key) 1192 if kwarg is not None: 1193 options[key] = kwarg 1194 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1195 if tmp_df.size == 1: 1196 deriv = np.array([tmp_df.real]) 1197 else: 1198 deriv = tmp_df.real 1199 else: 1200 deriv = jacobian(func)(values, **kwargs) 1201 1202 final_result = np.zeros(new_values.shape, dtype=object) 1203 1204 if array_mode is True: 1205 1206 class _Zero_grad(): 1207 def __init__(self, N): 1208 self.grad = np.zeros((N, 1)) 1209 1210 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1211 d_extracted = {} 1212 g_extracted = {} 1213 for name in new_sample_names: 1214 d_extracted[name] = [] 1215 ens_length = len(new_idl_d[name]) 1216 for i_dat, dat in enumerate(data): 1217 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1218 for name in new_cov_names: 1219 g_extracted[name] = [] 1220 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1221 for i_dat, dat in enumerate(data): 1222 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1223 1224 for i_val, new_val in np.ndenumerate(new_values): 1225 new_deltas = {} 1226 new_grad = {} 1227 if array_mode is True: 1228 for name in new_sample_names: 1229 ens_length = d_extracted[name][0].shape[-1] 1230 new_deltas[name] = np.zeros(ens_length) 1231 for i_dat, dat in enumerate(d_extracted[name]): 1232 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1233 for name in new_cov_names: 1234 new_grad[name] = 0 1235 for i_dat, dat in enumerate(g_extracted[name]): 1236 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1237 else: 1238 for j_obs, obs in np.ndenumerate(data): 1239 for name in obs.names: 1240 if name in obs.cov_names: 1241 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1242 else: 1243 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1244 1245 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1246 1247 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1248 raise Exception('The same name has been used for deltas and covobs!') 1249 new_samples = [] 1250 new_means = [] 1251 new_idl = [] 1252 new_names_obs = [] 1253 for name in new_names: 1254 if name not in new_covobs: 1255 new_samples.append(new_deltas[name]) 1256 new_idl.append(new_idl_d[name]) 1257 new_means.append(new_r_values[name][i_val]) 1258 new_names_obs.append(name) 1259 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1260 for name in new_covobs: 1261 final_result[i_val].names.append(name) 1262 final_result[i_val]._covobs = new_covobs 1263 final_result[i_val]._value = new_val 1264 final_result[i_val].reweighted = reweighted 1265 1266 if multi == 0: 1267 final_result = final_result.item() 1268 1269 return final_result 1270 1271 1272def _reduce_deltas(deltas, idx_old, idx_new): 1273 """Extract deltas defined on idx_old on all configs of idx_new. 1274 1275 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1276 are ordered in an ascending order. 1277 1278 Parameters 1279 ---------- 1280 deltas : list 1281 List of fluctuations 1282 idx_old : list 1283 List or range of configs on which the deltas are defined 1284 idx_new : list 1285 List of configs for which we want to extract the deltas. 1286 Has to be a subset of idx_old. 1287 """ 1288 if not len(deltas) == len(idx_old): 1289 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1290 if type(idx_old) is range and type(idx_new) is range: 1291 if idx_old == idx_new: 1292 return deltas 1293 if _check_lists_equal([idx_old, idx_new]): 1294 return deltas 1295 indices = np.intersect1d(idx_old, idx_new, assume_unique=True, return_indices=True)[1] 1296 if len(indices) < len(idx_new): 1297 raise Exception('Error in _reduce_deltas: Config of idx_new not in idx_old') 1298 return np.array(deltas)[indices] 1299 1300 1301def reweight(weight, obs, **kwargs): 1302 """Reweight a list of observables. 1303 1304 Parameters 1305 ---------- 1306 weight : Obs 1307 Reweighting factor. An Observable that has to be defined on a superset of the 1308 configurations in obs[i].idl for all i. 1309 obs : list 1310 list of Obs, e.g. [obs1, obs2, obs3]. 1311 all_configs : bool 1312 if True, the reweighted observables are normalized by the average of 1313 the reweighting factor on all configurations in weight.idl and not 1314 on the configurations in obs[i].idl. Default False. 1315 """ 1316 result = [] 1317 for i in range(len(obs)): 1318 if len(obs[i].cov_names): 1319 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1320 if not set(obs[i].names).issubset(weight.names): 1321 raise Exception('Error: Ensembles do not fit') 1322 for name in obs[i].names: 1323 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1324 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1325 new_samples = [] 1326 w_deltas = {} 1327 for name in sorted(obs[i].names): 1328 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1329 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1330 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1331 1332 if kwargs.get('all_configs'): 1333 new_weight = weight 1334 else: 1335 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1336 1337 result.append(tmp_obs / new_weight) 1338 result[-1].reweighted = True 1339 1340 return result 1341 1342 1343def correlate(obs_a, obs_b): 1344 """Correlate two observables. 1345 1346 Parameters 1347 ---------- 1348 obs_a : Obs 1349 First observable 1350 obs_b : Obs 1351 Second observable 1352 1353 Notes 1354 ----- 1355 Keep in mind to only correlate primary observables which have not been reweighted 1356 yet. The reweighting has to be applied after correlating the observables. 1357 Currently only works if ensembles are identical (this is not strictly necessary). 1358 """ 1359 1360 if sorted(obs_a.names) != sorted(obs_b.names): 1361 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1362 if len(obs_a.cov_names) or len(obs_b.cov_names): 1363 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1364 for name in obs_a.names: 1365 if obs_a.shape[name] != obs_b.shape[name]: 1366 raise Exception('Shapes of ensemble', name, 'do not fit') 1367 if obs_a.idl[name] != obs_b.idl[name]: 1368 raise Exception('idl of ensemble', name, 'do not fit') 1369 1370 if obs_a.reweighted is True: 1371 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1372 if obs_b.reweighted is True: 1373 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1374 1375 new_samples = [] 1376 new_idl = [] 1377 for name in sorted(obs_a.names): 1378 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1379 new_idl.append(obs_a.idl[name]) 1380 1381 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1382 o.reweighted = obs_a.reweighted or obs_b.reweighted 1383 return o 1384 1385 1386def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1387 r'''Calculates the error covariance matrix of a set of observables. 1388 1389 WARNING: This function should be used with care, especially for observables with support on multiple 1390 ensembles with differing autocorrelations. See the notes below for details. 1391 1392 The gamma method has to be applied first to all observables. 1393 1394 Parameters 1395 ---------- 1396 obs : list or numpy.ndarray 1397 List or one dimensional array of Obs 1398 visualize : bool 1399 If True plots the corresponding normalized correlation matrix (default False). 1400 correlation : bool 1401 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1402 smooth : None or int 1403 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1404 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1405 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1406 small ones. 1407 1408 Notes 1409 ----- 1410 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1411 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1412 in the absence of autocorrelation. 1413 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1414 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1415 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1416 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1417 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1418 ''' 1419 1420 length = len(obs) 1421 1422 max_samples = np.max([o.N for o in obs]) 1423 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1424 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1425 1426 cov = np.zeros((length, length)) 1427 for i in range(length): 1428 for j in range(i, length): 1429 cov[i, j] = _covariance_element(obs[i], obs[j]) 1430 cov = cov + cov.T - np.diag(np.diag(cov)) 1431 1432 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1433 1434 if isinstance(smooth, int): 1435 corr = _smooth_eigenvalues(corr, smooth) 1436 1437 if visualize: 1438 plt.matshow(corr, vmin=-1, vmax=1) 1439 plt.set_cmap('RdBu') 1440 plt.colorbar() 1441 plt.draw() 1442 1443 if correlation is True: 1444 return corr 1445 1446 errors = [o.dvalue for o in obs] 1447 cov = np.diag(errors) @ corr @ np.diag(errors) 1448 1449 eigenvalues = np.linalg.eigh(cov)[0] 1450 if not np.all(eigenvalues >= 0): 1451 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1452 1453 return cov 1454 1455 1456def _smooth_eigenvalues(corr, E): 1457 """Eigenvalue smoothing as described in hep-lat/9412087 1458 1459 corr : np.ndarray 1460 correlation matrix 1461 E : integer 1462 Number of eigenvalues to be left substantially unchanged 1463 """ 1464 if not (2 < E < corr.shape[0] - 1): 1465 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1466 vals, vec = np.linalg.eigh(corr) 1467 lambda_min = np.mean(vals[:-E]) 1468 vals[vals < lambda_min] = lambda_min 1469 vals /= np.mean(vals) 1470 return vec @ np.diag(vals) @ vec.T 1471 1472 1473def _covariance_element(obs1, obs2): 1474 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1475 1476 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1477 deltas1 = _reduce_deltas(deltas1, idx1, new_idx) 1478 deltas2 = _reduce_deltas(deltas2, idx2, new_idx) 1479 return np.sum(deltas1 * deltas2) 1480 1481 if set(obs1.names).isdisjoint(set(obs2.names)): 1482 return 0.0 1483 1484 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1485 raise Exception('The gamma method has to be applied to both Obs first.') 1486 1487 dvalue = 0.0 1488 1489 for e_name in obs1.mc_names: 1490 1491 if e_name not in obs2.mc_names: 1492 continue 1493 1494 idl_d = {} 1495 for r_name in obs1.e_content[e_name]: 1496 if r_name not in obs2.e_content[e_name]: 1497 continue 1498 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1499 1500 gamma = 0.0 1501 1502 for r_name in obs1.e_content[e_name]: 1503 if r_name not in obs2.e_content[e_name]: 1504 continue 1505 if len(idl_d[r_name]) == 0: 1506 continue 1507 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1508 1509 if gamma == 0.0: 1510 continue 1511 1512 gamma_div = 0.0 1513 for r_name in obs1.e_content[e_name]: 1514 if r_name not in obs2.e_content[e_name]: 1515 continue 1516 if len(idl_d[r_name]) == 0: 1517 continue 1518 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1519 gamma /= gamma_div 1520 1521 dvalue += gamma 1522 1523 for e_name in obs1.cov_names: 1524 1525 if e_name not in obs2.cov_names: 1526 continue 1527 1528 dvalue += np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad)).item() 1529 1530 return dvalue 1531 1532 1533def import_jackknife(jacks, name, idl=None): 1534 """Imports jackknife samples and returns an Obs 1535 1536 Parameters 1537 ---------- 1538 jacks : numpy.ndarray 1539 numpy array containing the mean value as zeroth entry and 1540 the N jackknife samples as first to Nth entry. 1541 name : str 1542 name of the ensemble the samples are defined on. 1543 """ 1544 length = len(jacks) - 1 1545 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1546 samples = jacks[1:] @ prj 1547 mean = np.mean(samples) 1548 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1549 new_obs._value = jacks[0] 1550 return new_obs 1551 1552 1553def merge_obs(list_of_obs): 1554 """Combine all observables in list_of_obs into one new observable 1555 1556 Parameters 1557 ---------- 1558 list_of_obs : list 1559 list of the Obs object to be combined 1560 1561 Notes 1562 ----- 1563 It is not possible to combine obs which are based on the same replicum 1564 """ 1565 replist = [item for obs in list_of_obs for item in obs.names] 1566 if (len(replist) == len(set(replist))) is False: 1567 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1568 if any([len(o.cov_names) for o in list_of_obs]): 1569 raise Exception('Not possible to merge data that contains covobs!') 1570 new_dict = {} 1571 idl_dict = {} 1572 for o in list_of_obs: 1573 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1574 for key in set(o.deltas) | set(o.r_values)}) 1575 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1576 1577 names = sorted(new_dict.keys()) 1578 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1579 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1580 return o 1581 1582 1583def cov_Obs(means, cov, name, grad=None): 1584 """Create an Obs based on mean(s) and a covariance matrix 1585 1586 Parameters 1587 ---------- 1588 mean : list of floats or float 1589 N mean value(s) of the new Obs 1590 cov : list or array 1591 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1592 name : str 1593 identifier for the covariance matrix 1594 grad : list or array 1595 Gradient of the Covobs wrt. the means belonging to cov. 1596 """ 1597 1598 def covobs_to_obs(co): 1599 """Make an Obs out of a Covobs 1600 1601 Parameters 1602 ---------- 1603 co : Covobs 1604 Covobs to be embedded into the Obs 1605 """ 1606 o = Obs([], [], means=[]) 1607 o._value = co.value 1608 o.names.append(co.name) 1609 o._covobs[co.name] = co 1610 o._dvalue = np.sqrt(co.errsq()) 1611 return o 1612 1613 ol = [] 1614 if isinstance(means, (float, int)): 1615 means = [means] 1616 1617 for i in range(len(means)): 1618 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1619 if ol[0].covobs[name].N != len(means): 1620 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1621 if len(ol) == 1: 1622 return ol[0] 1623 return ol 1624 1625 1626def _determine_gap(o, e_content, e_name): 1627 gaps = [] 1628 for r_name in e_content[e_name]: 1629 if isinstance(o.idl[r_name], range): 1630 gaps.append(o.idl[r_name].step) 1631 else: 1632 gaps.append(np.min(np.diff(o.idl[r_name]))) 1633 1634 gap = min(gaps) 1635 if not np.all([gi % gap == 0 for gi in gaps]): 1636 raise Exception(f"Replica for ensemble {e_name} do not have a common spacing.", gaps) 1637 1638 return gap 1639 1640 1641def _check_lists_equal(idl): 1642 ''' 1643 Use groupby to efficiently check whether all elements of idl are identical. 1644 Returns True if all elements are equal, otherwise False. 1645 1646 Parameters 1647 ---------- 1648 idl : list of lists, ranges or np.ndarrays 1649 ''' 1650 g = groupby([np.nditer(el) if isinstance(el, np.ndarray) else el for el in idl]) 1651 if next(g, True) and not next(g, False): 1652 return True 1653 return False
18class Obs: 19 """Class for a general observable. 20 21 Instances of Obs are the basic objects of a pyerrors error analysis. 22 They are initialized with a list which contains arrays of samples for 23 different ensembles/replica and another list of same length which contains 24 the names of the ensembles/replica. Mathematical operations can be 25 performed on instances. The result is another instance of Obs. The error of 26 an instance can be computed with the gamma_method. Also contains additional 27 methods for output and visualization of the error calculation. 28 29 Attributes 30 ---------- 31 S_global : float 32 Standard value for S (default 2.0) 33 S_dict : dict 34 Dictionary for S values. If an entry for a given ensemble 35 exists this overwrites the standard value for that ensemble. 36 tau_exp_global : float 37 Standard value for tau_exp (default 0.0) 38 tau_exp_dict : dict 39 Dictionary for tau_exp values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 N_sigma_global : float 42 Standard value for N_sigma (default 1.0) 43 N_sigma_dict : dict 44 Dictionary for N_sigma values. If an entry for a given ensemble exists 45 this overwrites the standard value for that ensemble. 46 """ 47 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 48 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 49 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 50 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 51 'idl', 'tag', '_covobs', '__dict__'] 52 53 S_global = 2.0 54 S_dict = {} 55 tau_exp_global = 0.0 56 tau_exp_dict = {} 57 N_sigma_global = 1.0 58 N_sigma_dict = {} 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise ValueError('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise ValueError('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise ValueError('Names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise ValueError('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise ValueError("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise TypeError('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 gapsize = _determine_gap(self, e_content, e_name) 241 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 273 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 274 # Make sure no entry of tauint is smaller than 0.5 275 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 276 # hep-lat/0306017 eq. (42) 277 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 278 self.e_n_dtauint[e_name][0] = 0.0 279 280 def _compute_drho(i): 281 tmp = (self.e_rho[e_name][i + 1:w_max] 282 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 283 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 284 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 285 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 286 287 if self.tau_exp[e_name] > 0: 288 _compute_drho(1) 289 texp = self.tau_exp[e_name] 290 # Critical slowing down analysis 291 if w_max // 2 <= 1: 292 raise Exception("Need at least 8 samples for tau_exp error analysis") 293 for n in range(1, w_max // 2): 294 _compute_drho(n + 1) 295 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 296 # Bias correction hep-lat/0306017 eq. (49) included 297 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 298 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 299 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 300 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 302 self.e_windowsize[e_name] = n 303 break 304 else: 305 if self.S[e_name] == 0.0: 306 self.e_tauint[e_name] = 0.5 307 self.e_dtauint[e_name] = 0.0 308 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 310 self.e_windowsize[e_name] = 0 311 else: 312 # Standard automatic windowing procedure 313 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 314 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 315 for n in range(1, w_max): 316 if g_w[n - 1] < 0 or n >= w_max - 1: 317 _compute_drho(n) 318 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 319 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 320 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 322 self.e_windowsize[e_name] = n 323 break 324 325 self._dvalue += self.e_dvalue[e_name] ** 2 326 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 327 328 for e_name in self.cov_names: 329 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 330 self.e_ddvalue[e_name] = 0 331 self._dvalue += self.e_dvalue[e_name]**2 332 333 self._dvalue = np.sqrt(self._dvalue) 334 if self._dvalue == 0.0: 335 self.ddvalue = 0.0 336 else: 337 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 338 return 339 340 gm = gamma_method 341 342 def _calc_gamma(self, deltas, idx, shape, w_max, fft, gapsize): 343 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 344 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 345 346 Parameters 347 ---------- 348 deltas : list 349 List of fluctuations 350 idx : list 351 List or range of configurations on which the deltas are defined. 352 shape : int 353 Number of configurations in idx. 354 w_max : int 355 Upper bound for the summation window. 356 fft : bool 357 determines whether the fft algorithm is used for the computation 358 of the autocorrelation function. 359 gapsize : int 360 The target distance between two configurations. If longer distances 361 are found in idx, the data is expanded. 362 """ 363 gamma = np.zeros(w_max) 364 deltas = _expand_deltas(deltas, idx, shape, gapsize) 365 new_shape = len(deltas) 366 if fft: 367 max_gamma = min(new_shape, w_max) 368 # The padding for the fft has to be even 369 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 370 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 371 else: 372 for n in range(w_max): 373 if new_shape - n >= 0: 374 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 375 376 return gamma 377 378 def details(self, ens_content=True): 379 """Output detailed properties of the Obs. 380 381 Parameters 382 ---------- 383 ens_content : bool 384 print details about the ensembles and replica if true. 385 """ 386 if self.tag is not None: 387 print("Description:", self.tag) 388 if not hasattr(self, 'e_dvalue'): 389 print('Result\t %3.8e' % (self.value)) 390 else: 391 if self.value == 0.0: 392 percentage = np.nan 393 else: 394 percentage = np.abs(self._dvalue / self.value) * 100 395 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 396 if len(self.e_names) > 1: 397 print(' Ensemble errors:') 398 e_content = self.e_content 399 for e_name in self.mc_names: 400 gap = _determine_gap(self, e_content, e_name) 401 402 if len(self.e_names) > 1: 403 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 404 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 405 tau_string += f" in units of {gap} config" 406 if gap > 1: 407 tau_string += "s" 408 if self.tau_exp[e_name] > 0: 409 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 410 else: 411 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 412 print(tau_string) 413 for e_name in self.cov_names: 414 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 415 if ens_content is True: 416 if len(self.e_names) == 1: 417 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 418 else: 419 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 420 my_string_list = [] 421 for key, value in sorted(self.e_content.items()): 422 if key not in self.covobs: 423 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 424 if len(value) == 1: 425 my_string += f': {self.shape[value[0]]} configurations' 426 if isinstance(self.idl[value[0]], range): 427 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 428 else: 429 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 430 else: 431 sublist = [] 432 for v in value: 433 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 434 my_substring += f': {self.shape[v]} configurations' 435 if isinstance(self.idl[v], range): 436 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 437 else: 438 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 439 sublist.append(my_substring) 440 441 my_string += '\n' + '\n'.join(sublist) 442 else: 443 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 444 my_string_list.append(my_string) 445 print('\n'.join(my_string_list)) 446 447 def reweight(self, weight): 448 """Reweight the obs with given rewighting factors. 449 450 Parameters 451 ---------- 452 weight : Obs 453 Reweighting factor. An Observable that has to be defined on a superset of the 454 configurations in obs[i].idl for all i. 455 all_configs : bool 456 if True, the reweighted observables are normalized by the average of 457 the reweighting factor on all configurations in weight.idl and not 458 on the configurations in obs[i].idl. Default False. 459 """ 460 return reweight(weight, [self])[0] 461 462 def is_zero_within_error(self, sigma=1): 463 """Checks whether the observable is zero within 'sigma' standard errors. 464 465 Parameters 466 ---------- 467 sigma : int 468 Number of standard errors used for the check. 469 470 Works only properly when the gamma method was run. 471 """ 472 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 473 474 def is_zero(self, atol=1e-10): 475 """Checks whether the observable is zero within a given tolerance. 476 477 Parameters 478 ---------- 479 atol : float 480 Absolute tolerance (for details see numpy documentation). 481 """ 482 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 483 484 def plot_tauint(self, save=None): 485 """Plot integrated autocorrelation time for each ensemble. 486 487 Parameters 488 ---------- 489 save : str 490 saves the figure to a file named 'save' if. 491 """ 492 if not hasattr(self, 'e_dvalue'): 493 raise Exception('Run the gamma method first.') 494 495 for e, e_name in enumerate(self.mc_names): 496 fig = plt.figure() 497 plt.xlabel(r'$W$') 498 plt.ylabel(r'$\tau_\mathrm{int}$') 499 length = int(len(self.e_n_tauint[e_name])) 500 if self.tau_exp[e_name] > 0: 501 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 502 x_help = np.arange(2 * self.tau_exp[e_name]) 503 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 504 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 505 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 506 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 507 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 508 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 509 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 510 else: 511 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 514 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 515 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 516 plt.legend() 517 plt.xlim(-0.5, xmax) 518 ylim = plt.ylim() 519 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 520 plt.draw() 521 if save: 522 fig.savefig(save + "_" + str(e)) 523 524 def plot_rho(self, save=None): 525 """Plot normalized autocorrelation function time for each ensemble. 526 527 Parameters 528 ---------- 529 save : str 530 saves the figure to a file named 'save' if. 531 """ 532 if not hasattr(self, 'e_dvalue'): 533 raise Exception('Run the gamma method first.') 534 for e, e_name in enumerate(self.mc_names): 535 fig = plt.figure() 536 plt.xlabel('W') 537 plt.ylabel('rho') 538 length = int(len(self.e_drho[e_name])) 539 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 540 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 541 if self.tau_exp[e_name] > 0: 542 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 543 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 544 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 545 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 546 else: 547 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 548 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 549 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 550 plt.xlim(-0.5, xmax) 551 plt.draw() 552 if save: 553 fig.savefig(save + "_" + str(e)) 554 555 def plot_rep_dist(self): 556 """Plot replica distribution for each ensemble with more than one replicum.""" 557 if not hasattr(self, 'e_dvalue'): 558 raise Exception('Run the gamma method first.') 559 for e, e_name in enumerate(self.mc_names): 560 if len(self.e_content[e_name]) == 1: 561 print('No replica distribution for a single replicum (', e_name, ')') 562 continue 563 r_length = [] 564 sub_r_mean = 0 565 for r, r_name in enumerate(self.e_content[e_name]): 566 r_length.append(len(self.deltas[r_name])) 567 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 568 e_N = np.sum(r_length) 569 sub_r_mean /= e_N 570 arr = np.zeros(len(self.e_content[e_name])) 571 for r, r_name in enumerate(self.e_content[e_name]): 572 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 573 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 574 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 575 plt.draw() 576 577 def plot_history(self, expand=True): 578 """Plot derived Monte Carlo history for each ensemble 579 580 Parameters 581 ---------- 582 expand : bool 583 show expanded history for irregular Monte Carlo chains (default: True). 584 """ 585 for e, e_name in enumerate(self.mc_names): 586 plt.figure() 587 r_length = [] 588 tmp = [] 589 tmp_expanded = [] 590 for r, r_name in enumerate(self.e_content[e_name]): 591 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 592 if expand: 593 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 594 r_length.append(len(tmp_expanded[-1])) 595 else: 596 r_length.append(len(tmp[-1])) 597 e_N = np.sum(r_length) 598 x = np.arange(e_N) 599 y_test = np.concatenate(tmp, axis=0) 600 if expand: 601 y = np.concatenate(tmp_expanded, axis=0) 602 else: 603 y = y_test 604 plt.errorbar(x, y, fmt='.', markersize=3) 605 plt.xlim(-0.5, e_N - 0.5) 606 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 607 plt.draw() 608 609 def plot_piechart(self, save=None): 610 """Plot piechart which shows the fractional contribution of each 611 ensemble to the error and returns a dictionary containing the fractions. 612 613 Parameters 614 ---------- 615 save : str 616 saves the figure to a file named 'save' if. 617 """ 618 if not hasattr(self, 'e_dvalue'): 619 raise Exception('Run the gamma method first.') 620 if np.isclose(0.0, self._dvalue, atol=1e-15): 621 raise Exception('Error is 0.0') 622 labels = self.e_names 623 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 624 fig1, ax1 = plt.subplots() 625 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 626 ax1.axis('equal') 627 plt.draw() 628 if save: 629 fig1.savefig(save) 630 631 return dict(zip(self.e_names, sizes)) 632 633 def dump(self, filename, datatype="json.gz", description="", **kwargs): 634 """Dump the Obs to a file 'name' of chosen format. 635 636 Parameters 637 ---------- 638 filename : str 639 name of the file to be saved. 640 datatype : str 641 Format of the exported file. Supported formats include 642 "json.gz" and "pickle" 643 description : str 644 Description for output file, only relevant for json.gz format. 645 path : str 646 specifies a custom path for the file (default '.') 647 """ 648 if 'path' in kwargs: 649 file_name = kwargs.get('path') + '/' + filename 650 else: 651 file_name = filename 652 653 if datatype == "json.gz": 654 from .input.json import dump_to_json 655 dump_to_json([self], file_name, description=description) 656 elif datatype == "pickle": 657 with open(file_name + '.p', 'wb') as fb: 658 pickle.dump(self, fb) 659 else: 660 raise Exception("Unknown datatype " + str(datatype)) 661 662 def export_jackknife(self): 663 """Export jackknife samples from the Obs 664 665 Returns 666 ------- 667 numpy.ndarray 668 Returns a numpy array of length N + 1 where N is the number of samples 669 for the given ensemble and replicum. The zeroth entry of the array contains 670 the mean value of the Obs, entries 1 to N contain the N jackknife samples 671 derived from the Obs. The current implementation only works for observables 672 defined on exactly one ensemble and replicum. The derived jackknife samples 673 should agree with samples from a full jackknife analysis up to O(1/N). 674 """ 675 676 if len(self.names) != 1: 677 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 678 679 name = self.names[0] 680 full_data = self.deltas[name] + self.r_values[name] 681 n = full_data.size 682 mean = self.value 683 tmp_jacks = np.zeros(n + 1) 684 tmp_jacks[0] = mean 685 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 686 return tmp_jacks 687 688 def __float__(self): 689 return float(self.value) 690 691 def __repr__(self): 692 return 'Obs[' + str(self) + ']' 693 694 def __str__(self): 695 return _format_uncertainty(self.value, self._dvalue) 696 697 def __format__(self, format_type): 698 if format_type == "": 699 significance = 2 700 else: 701 significance = int(float(format_type.replace("+", "").replace("-", ""))) 702 my_str = _format_uncertainty(self.value, self._dvalue, 703 significance=significance) 704 for char in ["+", " "]: 705 if format_type.startswith(char): 706 if my_str[0] != "-": 707 my_str = char + my_str 708 return my_str 709 710 def __hash__(self): 711 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 712 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 713 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 714 hash_tuple += tuple([o.encode() for o in self.names]) 715 m = hashlib.md5() 716 [m.update(o) for o in hash_tuple] 717 return int(m.hexdigest(), 16) & 0xFFFFFFFF 718 719 # Overload comparisons 720 def __lt__(self, other): 721 return self.value < other 722 723 def __le__(self, other): 724 return self.value <= other 725 726 def __gt__(self, other): 727 return self.value > other 728 729 def __ge__(self, other): 730 return self.value >= other 731 732 def __eq__(self, other): 733 return (self - other).is_zero() 734 735 def __ne__(self, other): 736 return not (self - other).is_zero() 737 738 # Overload math operations 739 def __add__(self, y): 740 if isinstance(y, Obs): 741 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 742 else: 743 if isinstance(y, np.ndarray): 744 return np.array([self + o for o in y]) 745 elif y.__class__.__name__ in ['Corr', 'CObs']: 746 return NotImplemented 747 else: 748 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 749 750 def __radd__(self, y): 751 return self + y 752 753 def __mul__(self, y): 754 if isinstance(y, Obs): 755 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 756 else: 757 if isinstance(y, np.ndarray): 758 return np.array([self * o for o in y]) 759 elif isinstance(y, complex): 760 return CObs(self * y.real, self * y.imag) 761 elif y.__class__.__name__ in ['Corr', 'CObs']: 762 return NotImplemented 763 else: 764 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 765 766 def __rmul__(self, y): 767 return self * y 768 769 def __sub__(self, y): 770 if isinstance(y, Obs): 771 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 772 else: 773 if isinstance(y, np.ndarray): 774 return np.array([self - o for o in y]) 775 elif y.__class__.__name__ in ['Corr', 'CObs']: 776 return NotImplemented 777 else: 778 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 779 780 def __rsub__(self, y): 781 return -1 * (self - y) 782 783 def __pos__(self): 784 return self 785 786 def __neg__(self): 787 return -1 * self 788 789 def __truediv__(self, y): 790 if isinstance(y, Obs): 791 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 792 else: 793 if isinstance(y, np.ndarray): 794 return np.array([self / o for o in y]) 795 elif y.__class__.__name__ in ['Corr', 'CObs']: 796 return NotImplemented 797 else: 798 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 799 800 def __rtruediv__(self, y): 801 if isinstance(y, Obs): 802 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 803 else: 804 if isinstance(y, np.ndarray): 805 return np.array([o / self for o in y]) 806 elif y.__class__.__name__ in ['Corr', 'CObs']: 807 return NotImplemented 808 else: 809 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 810 811 def __pow__(self, y): 812 if isinstance(y, Obs): 813 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 814 else: 815 return derived_observable(lambda x: x[0] ** y, [self]) 816 817 def __rpow__(self, y): 818 if isinstance(y, Obs): 819 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 820 else: 821 return derived_observable(lambda x: y ** x[0], [self]) 822 823 def __abs__(self): 824 return derived_observable(lambda x: anp.abs(x[0]), [self]) 825 826 # Overload numpy functions 827 def sqrt(self): 828 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 829 830 def log(self): 831 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 832 833 def exp(self): 834 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 835 836 def sin(self): 837 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 838 839 def cos(self): 840 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 841 842 def tan(self): 843 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 844 845 def arcsin(self): 846 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 847 848 def arccos(self): 849 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 850 851 def arctan(self): 852 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 853 854 def sinh(self): 855 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 856 857 def cosh(self): 858 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 859 860 def tanh(self): 861 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 862 863 def arcsinh(self): 864 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 865 866 def arccosh(self): 867 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 868 869 def arctanh(self): 870 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise ValueError('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise ValueError('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise ValueError('Names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise ValueError('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise ValueError("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise TypeError('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise ValueError('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 gapsize = _determine_gap(self, e_content, e_name) 241 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 273 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 274 # Make sure no entry of tauint is smaller than 0.5 275 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 276 # hep-lat/0306017 eq. (42) 277 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 278 self.e_n_dtauint[e_name][0] = 0.0 279 280 def _compute_drho(i): 281 tmp = (self.e_rho[e_name][i + 1:w_max] 282 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 283 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 284 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 285 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 286 287 if self.tau_exp[e_name] > 0: 288 _compute_drho(1) 289 texp = self.tau_exp[e_name] 290 # Critical slowing down analysis 291 if w_max // 2 <= 1: 292 raise Exception("Need at least 8 samples for tau_exp error analysis") 293 for n in range(1, w_max // 2): 294 _compute_drho(n + 1) 295 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 296 # Bias correction hep-lat/0306017 eq. (49) included 297 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 298 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 299 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 300 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 302 self.e_windowsize[e_name] = n 303 break 304 else: 305 if self.S[e_name] == 0.0: 306 self.e_tauint[e_name] = 0.5 307 self.e_dtauint[e_name] = 0.0 308 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 310 self.e_windowsize[e_name] = 0 311 else: 312 # Standard automatic windowing procedure 313 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 314 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 315 for n in range(1, w_max): 316 if g_w[n - 1] < 0 or n >= w_max - 1: 317 _compute_drho(n) 318 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 319 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 320 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 322 self.e_windowsize[e_name] = n 323 break 324 325 self._dvalue += self.e_dvalue[e_name] ** 2 326 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 327 328 for e_name in self.cov_names: 329 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 330 self.e_ddvalue[e_name] = 0 331 self._dvalue += self.e_dvalue[e_name]**2 332 333 self._dvalue = np.sqrt(self._dvalue) 334 if self._dvalue == 0.0: 335 self.ddvalue = 0.0 336 else: 337 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 338 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 gapsize = _determine_gap(self, e_content, e_name) 241 242 r_length = [] 243 for r_name in e_content[e_name]: 244 if isinstance(self.idl[r_name], range): 245 r_length.append(len(self.idl[r_name]) * self.idl[r_name].step // gapsize) 246 else: 247 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1) // gapsize) 248 249 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 250 w_max = max(r_length) // 2 251 e_gamma[e_name] = np.zeros(w_max) 252 self.e_rho[e_name] = np.zeros(w_max) 253 self.e_drho[e_name] = np.zeros(w_max) 254 255 for r_name in e_content[e_name]: 256 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 257 258 gamma_div = np.zeros(w_max) 259 for r_name in e_content[e_name]: 260 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft, gapsize) 261 gamma_div[gamma_div < 1] = 1.0 262 e_gamma[e_name] /= gamma_div[:w_max] 263 264 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 265 self.e_tauint[e_name] = 0.5 266 self.e_dtauint[e_name] = 0.0 267 self.e_dvalue[e_name] = 0.0 268 self.e_ddvalue[e_name] = 0.0 269 self.e_windowsize[e_name] = 0 270 continue 271 272 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 273 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 274 # Make sure no entry of tauint is smaller than 0.5 275 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 276 # hep-lat/0306017 eq. (42) 277 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 278 self.e_n_dtauint[e_name][0] = 0.0 279 280 def _compute_drho(i): 281 tmp = (self.e_rho[e_name][i + 1:w_max] 282 + np.concatenate([self.e_rho[e_name][i - 1:None if i - (w_max - 1) // 2 <= 0 else (2 * i - (2 * w_max) // 2):-1], 283 self.e_rho[e_name][1:max(1, w_max - 2 * i)]]) 284 - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i]) 285 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 286 287 if self.tau_exp[e_name] > 0: 288 _compute_drho(1) 289 texp = self.tau_exp[e_name] 290 # Critical slowing down analysis 291 if w_max // 2 <= 1: 292 raise Exception("Need at least 8 samples for tau_exp error analysis") 293 for n in range(1, w_max // 2): 294 _compute_drho(n + 1) 295 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 296 # Bias correction hep-lat/0306017 eq. (49) included 297 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 298 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 299 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 300 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 302 self.e_windowsize[e_name] = n 303 break 304 else: 305 if self.S[e_name] == 0.0: 306 self.e_tauint[e_name] = 0.5 307 self.e_dtauint[e_name] = 0.0 308 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 309 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 310 self.e_windowsize[e_name] = 0 311 else: 312 # Standard automatic windowing procedure 313 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 314 g_w = np.exp(- np.arange(1, len(tau) + 1) / tau) - tau / np.sqrt(np.arange(1, len(tau) + 1) * e_N) 315 for n in range(1, w_max): 316 if g_w[n - 1] < 0 or n >= w_max - 1: 317 _compute_drho(n) 318 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 319 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 320 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 321 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 322 self.e_windowsize[e_name] = n 323 break 324 325 self._dvalue += self.e_dvalue[e_name] ** 2 326 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 327 328 for e_name in self.cov_names: 329 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 330 self.e_ddvalue[e_name] = 0 331 self._dvalue += self.e_dvalue[e_name]**2 332 333 self._dvalue = np.sqrt(self._dvalue) 334 if self._dvalue == 0.0: 335 self.ddvalue = 0.0 336 else: 337 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 338 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
378 def details(self, ens_content=True): 379 """Output detailed properties of the Obs. 380 381 Parameters 382 ---------- 383 ens_content : bool 384 print details about the ensembles and replica if true. 385 """ 386 if self.tag is not None: 387 print("Description:", self.tag) 388 if not hasattr(self, 'e_dvalue'): 389 print('Result\t %3.8e' % (self.value)) 390 else: 391 if self.value == 0.0: 392 percentage = np.nan 393 else: 394 percentage = np.abs(self._dvalue / self.value) * 100 395 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 396 if len(self.e_names) > 1: 397 print(' Ensemble errors:') 398 e_content = self.e_content 399 for e_name in self.mc_names: 400 gap = _determine_gap(self, e_content, e_name) 401 402 if len(self.e_names) > 1: 403 print('', e_name, '\t %3.6e +/- %3.6e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 404 tau_string = " \N{GREEK SMALL LETTER TAU}_int\t " + _format_uncertainty(self.e_tauint[e_name], self.e_dtauint[e_name]) 405 tau_string += f" in units of {gap} config" 406 if gap > 1: 407 tau_string += "s" 408 if self.tau_exp[e_name] > 0: 409 tau_string = f"{tau_string: <45}" + '\t(\N{GREEK SMALL LETTER TAU}_exp=%3.2f, N_\N{GREEK SMALL LETTER SIGMA}=%1.0i)' % (self.tau_exp[e_name], self.N_sigma[e_name]) 410 else: 411 tau_string = f"{tau_string: <45}" + '\t(S=%3.2f)' % (self.S[e_name]) 412 print(tau_string) 413 for e_name in self.cov_names: 414 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 415 if ens_content is True: 416 if len(self.e_names) == 1: 417 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 418 else: 419 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 420 my_string_list = [] 421 for key, value in sorted(self.e_content.items()): 422 if key not in self.covobs: 423 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 424 if len(value) == 1: 425 my_string += f': {self.shape[value[0]]} configurations' 426 if isinstance(self.idl[value[0]], range): 427 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 428 else: 429 my_string += f' (irregular range from {self.idl[value[0]][0]} to {self.idl[value[0]][-1]})' 430 else: 431 sublist = [] 432 for v in value: 433 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 434 my_substring += f': {self.shape[v]} configurations' 435 if isinstance(self.idl[v], range): 436 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 437 else: 438 my_substring += f' (irregular range from {self.idl[v][0]} to {self.idl[v][-1]})' 439 sublist.append(my_substring) 440 441 my_string += '\n' + '\n'.join(sublist) 442 else: 443 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 444 my_string_list.append(my_string) 445 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
447 def reweight(self, weight): 448 """Reweight the obs with given rewighting factors. 449 450 Parameters 451 ---------- 452 weight : Obs 453 Reweighting factor. An Observable that has to be defined on a superset of the 454 configurations in obs[i].idl for all i. 455 all_configs : bool 456 if True, the reweighted observables are normalized by the average of 457 the reweighting factor on all configurations in weight.idl and not 458 on the configurations in obs[i].idl. Default False. 459 """ 460 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
462 def is_zero_within_error(self, sigma=1): 463 """Checks whether the observable is zero within 'sigma' standard errors. 464 465 Parameters 466 ---------- 467 sigma : int 468 Number of standard errors used for the check. 469 470 Works only properly when the gamma method was run. 471 """ 472 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
474 def is_zero(self, atol=1e-10): 475 """Checks whether the observable is zero within a given tolerance. 476 477 Parameters 478 ---------- 479 atol : float 480 Absolute tolerance (for details see numpy documentation). 481 """ 482 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
484 def plot_tauint(self, save=None): 485 """Plot integrated autocorrelation time for each ensemble. 486 487 Parameters 488 ---------- 489 save : str 490 saves the figure to a file named 'save' if. 491 """ 492 if not hasattr(self, 'e_dvalue'): 493 raise Exception('Run the gamma method first.') 494 495 for e, e_name in enumerate(self.mc_names): 496 fig = plt.figure() 497 plt.xlabel(r'$W$') 498 plt.ylabel(r'$\tau_\mathrm{int}$') 499 length = int(len(self.e_n_tauint[e_name])) 500 if self.tau_exp[e_name] > 0: 501 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 502 x_help = np.arange(2 * self.tau_exp[e_name]) 503 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 504 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 505 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 506 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 507 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 508 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 509 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 510 else: 511 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 514 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 515 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 516 plt.legend() 517 plt.xlim(-0.5, xmax) 518 ylim = plt.ylim() 519 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 520 plt.draw() 521 if save: 522 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
524 def plot_rho(self, save=None): 525 """Plot normalized autocorrelation function time for each ensemble. 526 527 Parameters 528 ---------- 529 save : str 530 saves the figure to a file named 'save' if. 531 """ 532 if not hasattr(self, 'e_dvalue'): 533 raise Exception('Run the gamma method first.') 534 for e, e_name in enumerate(self.mc_names): 535 fig = plt.figure() 536 plt.xlabel('W') 537 plt.ylabel('rho') 538 length = int(len(self.e_drho[e_name])) 539 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 540 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 541 if self.tau_exp[e_name] > 0: 542 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 543 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 544 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 545 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 546 else: 547 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 548 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 549 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 550 plt.xlim(-0.5, xmax) 551 plt.draw() 552 if save: 553 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
555 def plot_rep_dist(self): 556 """Plot replica distribution for each ensemble with more than one replicum.""" 557 if not hasattr(self, 'e_dvalue'): 558 raise Exception('Run the gamma method first.') 559 for e, e_name in enumerate(self.mc_names): 560 if len(self.e_content[e_name]) == 1: 561 print('No replica distribution for a single replicum (', e_name, ')') 562 continue 563 r_length = [] 564 sub_r_mean = 0 565 for r, r_name in enumerate(self.e_content[e_name]): 566 r_length.append(len(self.deltas[r_name])) 567 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 568 e_N = np.sum(r_length) 569 sub_r_mean /= e_N 570 arr = np.zeros(len(self.e_content[e_name])) 571 for r, r_name in enumerate(self.e_content[e_name]): 572 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 573 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 574 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 575 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
577 def plot_history(self, expand=True): 578 """Plot derived Monte Carlo history for each ensemble 579 580 Parameters 581 ---------- 582 expand : bool 583 show expanded history for irregular Monte Carlo chains (default: True). 584 """ 585 for e, e_name in enumerate(self.mc_names): 586 plt.figure() 587 r_length = [] 588 tmp = [] 589 tmp_expanded = [] 590 for r, r_name in enumerate(self.e_content[e_name]): 591 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 592 if expand: 593 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name], 1) + self.r_values[r_name]) 594 r_length.append(len(tmp_expanded[-1])) 595 else: 596 r_length.append(len(tmp[-1])) 597 e_N = np.sum(r_length) 598 x = np.arange(e_N) 599 y_test = np.concatenate(tmp, axis=0) 600 if expand: 601 y = np.concatenate(tmp_expanded, axis=0) 602 else: 603 y = y_test 604 plt.errorbar(x, y, fmt='.', markersize=3) 605 plt.xlim(-0.5, e_N - 0.5) 606 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 607 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
609 def plot_piechart(self, save=None): 610 """Plot piechart which shows the fractional contribution of each 611 ensemble to the error and returns a dictionary containing the fractions. 612 613 Parameters 614 ---------- 615 save : str 616 saves the figure to a file named 'save' if. 617 """ 618 if not hasattr(self, 'e_dvalue'): 619 raise Exception('Run the gamma method first.') 620 if np.isclose(0.0, self._dvalue, atol=1e-15): 621 raise Exception('Error is 0.0') 622 labels = self.e_names 623 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 624 fig1, ax1 = plt.subplots() 625 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 626 ax1.axis('equal') 627 plt.draw() 628 if save: 629 fig1.savefig(save) 630 631 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
633 def dump(self, filename, datatype="json.gz", description="", **kwargs): 634 """Dump the Obs to a file 'name' of chosen format. 635 636 Parameters 637 ---------- 638 filename : str 639 name of the file to be saved. 640 datatype : str 641 Format of the exported file. Supported formats include 642 "json.gz" and "pickle" 643 description : str 644 Description for output file, only relevant for json.gz format. 645 path : str 646 specifies a custom path for the file (default '.') 647 """ 648 if 'path' in kwargs: 649 file_name = kwargs.get('path') + '/' + filename 650 else: 651 file_name = filename 652 653 if datatype == "json.gz": 654 from .input.json import dump_to_json 655 dump_to_json([self], file_name, description=description) 656 elif datatype == "pickle": 657 with open(file_name + '.p', 'wb') as fb: 658 pickle.dump(self, fb) 659 else: 660 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
662 def export_jackknife(self): 663 """Export jackknife samples from the Obs 664 665 Returns 666 ------- 667 numpy.ndarray 668 Returns a numpy array of length N + 1 where N is the number of samples 669 for the given ensemble and replicum. The zeroth entry of the array contains 670 the mean value of the Obs, entries 1 to N contain the N jackknife samples 671 derived from the Obs. The current implementation only works for observables 672 defined on exactly one ensemble and replicum. The derived jackknife samples 673 should agree with samples from a full jackknife analysis up to O(1/N). 674 """ 675 676 if len(self.names) != 1: 677 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 678 679 name = self.names[0] 680 full_data = self.deltas[name] + self.r_values[name] 681 n = full_data.size 682 mean = self.value 683 tmp_jacks = np.zeros(n + 1) 684 tmp_jacks[0] = mean 685 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 686 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
873class CObs: 874 """Class for a complex valued observable.""" 875 __slots__ = ['_real', '_imag', 'tag'] 876 877 def __init__(self, real, imag=0.0): 878 self._real = real 879 self._imag = imag 880 self.tag = None 881 882 @property 883 def real(self): 884 return self._real 885 886 @property 887 def imag(self): 888 return self._imag 889 890 def gamma_method(self, **kwargs): 891 """Executes the gamma_method for the real and the imaginary part.""" 892 if isinstance(self.real, Obs): 893 self.real.gamma_method(**kwargs) 894 if isinstance(self.imag, Obs): 895 self.imag.gamma_method(**kwargs) 896 897 def is_zero(self): 898 """Checks whether both real and imaginary part are zero within machine precision.""" 899 return self.real == 0.0 and self.imag == 0.0 900 901 def conjugate(self): 902 return CObs(self.real, -self.imag) 903 904 def __add__(self, other): 905 if isinstance(other, np.ndarray): 906 return other + self 907 elif hasattr(other, 'real') and hasattr(other, 'imag'): 908 return CObs(self.real + other.real, 909 self.imag + other.imag) 910 else: 911 return CObs(self.real + other, self.imag) 912 913 def __radd__(self, y): 914 return self + y 915 916 def __sub__(self, other): 917 if isinstance(other, np.ndarray): 918 return -1 * (other - self) 919 elif hasattr(other, 'real') and hasattr(other, 'imag'): 920 return CObs(self.real - other.real, self.imag - other.imag) 921 else: 922 return CObs(self.real - other, self.imag) 923 924 def __rsub__(self, other): 925 return -1 * (self - other) 926 927 def __mul__(self, other): 928 if isinstance(other, np.ndarray): 929 return other * self 930 elif hasattr(other, 'real') and hasattr(other, 'imag'): 931 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 932 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 933 [self.real, other.real, self.imag, other.imag], 934 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 935 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 936 [self.real, other.real, self.imag, other.imag], 937 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 938 elif getattr(other, 'imag', 0) != 0: 939 return CObs(self.real * other.real - self.imag * other.imag, 940 self.imag * other.real + self.real * other.imag) 941 else: 942 return CObs(self.real * other.real, self.imag * other.real) 943 else: 944 return CObs(self.real * other, self.imag * other) 945 946 def __rmul__(self, other): 947 return self * other 948 949 def __truediv__(self, other): 950 if isinstance(other, np.ndarray): 951 return 1 / (other / self) 952 elif hasattr(other, 'real') and hasattr(other, 'imag'): 953 r = other.real ** 2 + other.imag ** 2 954 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 955 else: 956 return CObs(self.real / other, self.imag / other) 957 958 def __rtruediv__(self, other): 959 r = self.real ** 2 + self.imag ** 2 960 if hasattr(other, 'real') and hasattr(other, 'imag'): 961 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 962 else: 963 return CObs(self.real * other / r, -self.imag * other / r) 964 965 def __abs__(self): 966 return np.sqrt(self.real**2 + self.imag**2) 967 968 def __pos__(self): 969 return self 970 971 def __neg__(self): 972 return -1 * self 973 974 def __eq__(self, other): 975 return self.real == other.real and self.imag == other.imag 976 977 def __str__(self): 978 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 979 980 def __repr__(self): 981 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
890 def gamma_method(self, **kwargs): 891 """Executes the gamma_method for the real and the imaginary part.""" 892 if isinstance(self.real, Obs): 893 self.real.gamma_method(**kwargs) 894 if isinstance(self.imag, Obs): 895 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1103def derived_observable(func, data, array_mode=False, **kwargs): 1104 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1105 1106 Parameters 1107 ---------- 1108 func : object 1109 arbitrary function of the form func(data, **kwargs). For the 1110 automatic differentiation to work, all numpy functions have to have 1111 the autograd wrapper (use 'import autograd.numpy as anp'). 1112 data : list 1113 list of Obs, e.g. [obs1, obs2, obs3]. 1114 num_grad : bool 1115 if True, numerical derivatives are used instead of autograd 1116 (default False). To control the numerical differentiation the 1117 kwargs of numdifftools.step_generators.MaxStepGenerator 1118 can be used. 1119 man_grad : list 1120 manually supply a list or an array which contains the jacobian 1121 of func. Use cautiously, supplying the wrong derivative will 1122 not be intercepted. 1123 1124 Notes 1125 ----- 1126 For simple mathematical operations it can be practical to use anonymous 1127 functions. For the ratio of two observables one can e.g. use 1128 1129 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1130 """ 1131 1132 data = np.asarray(data) 1133 raveled_data = data.ravel() 1134 1135 # Workaround for matrix operations containing non Obs data 1136 if not all(isinstance(x, Obs) for x in raveled_data): 1137 for i in range(len(raveled_data)): 1138 if isinstance(raveled_data[i], (int, float)): 1139 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1140 1141 allcov = {} 1142 for o in raveled_data: 1143 for name in o.cov_names: 1144 if name in allcov: 1145 if not np.allclose(allcov[name], o.covobs[name].cov): 1146 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1147 else: 1148 allcov[name] = o.covobs[name].cov 1149 1150 n_obs = len(raveled_data) 1151 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1152 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1153 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1154 1155 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1156 1157 if data.ndim == 1: 1158 values = np.array([o.value for o in data]) 1159 else: 1160 values = np.vectorize(lambda x: x.value)(data) 1161 1162 new_values = func(values, **kwargs) 1163 1164 multi = int(isinstance(new_values, np.ndarray)) 1165 1166 new_r_values = {} 1167 new_idl_d = {} 1168 for name in new_sample_names: 1169 idl = [] 1170 tmp_values = np.zeros(n_obs) 1171 for i, item in enumerate(raveled_data): 1172 tmp_values[i] = item.r_values.get(name, item.value) 1173 tmp_idl = item.idl.get(name) 1174 if tmp_idl is not None: 1175 idl.append(tmp_idl) 1176 if multi > 0: 1177 tmp_values = np.array(tmp_values).reshape(data.shape) 1178 new_r_values[name] = func(tmp_values, **kwargs) 1179 new_idl_d[name] = _merge_idx(idl) 1180 1181 if 'man_grad' in kwargs: 1182 deriv = np.asarray(kwargs.get('man_grad')) 1183 if new_values.shape + data.shape != deriv.shape: 1184 raise Exception('Manual derivative does not have correct shape.') 1185 elif kwargs.get('num_grad') is True: 1186 if multi > 0: 1187 raise Exception('Multi mode currently not supported for numerical derivative') 1188 options = { 1189 'base_step': 0.1, 1190 'step_ratio': 2.5} 1191 for key in options.keys(): 1192 kwarg = kwargs.get(key) 1193 if kwarg is not None: 1194 options[key] = kwarg 1195 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1196 if tmp_df.size == 1: 1197 deriv = np.array([tmp_df.real]) 1198 else: 1199 deriv = tmp_df.real 1200 else: 1201 deriv = jacobian(func)(values, **kwargs) 1202 1203 final_result = np.zeros(new_values.shape, dtype=object) 1204 1205 if array_mode is True: 1206 1207 class _Zero_grad(): 1208 def __init__(self, N): 1209 self.grad = np.zeros((N, 1)) 1210 1211 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1212 d_extracted = {} 1213 g_extracted = {} 1214 for name in new_sample_names: 1215 d_extracted[name] = [] 1216 ens_length = len(new_idl_d[name]) 1217 for i_dat, dat in enumerate(data): 1218 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1219 for name in new_cov_names: 1220 g_extracted[name] = [] 1221 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1222 for i_dat, dat in enumerate(data): 1223 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1224 1225 for i_val, new_val in np.ndenumerate(new_values): 1226 new_deltas = {} 1227 new_grad = {} 1228 if array_mode is True: 1229 for name in new_sample_names: 1230 ens_length = d_extracted[name][0].shape[-1] 1231 new_deltas[name] = np.zeros(ens_length) 1232 for i_dat, dat in enumerate(d_extracted[name]): 1233 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1234 for name in new_cov_names: 1235 new_grad[name] = 0 1236 for i_dat, dat in enumerate(g_extracted[name]): 1237 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1238 else: 1239 for j_obs, obs in np.ndenumerate(data): 1240 for name in obs.names: 1241 if name in obs.cov_names: 1242 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1243 else: 1244 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1245 1246 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1247 1248 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1249 raise Exception('The same name has been used for deltas and covobs!') 1250 new_samples = [] 1251 new_means = [] 1252 new_idl = [] 1253 new_names_obs = [] 1254 for name in new_names: 1255 if name not in new_covobs: 1256 new_samples.append(new_deltas[name]) 1257 new_idl.append(new_idl_d[name]) 1258 new_means.append(new_r_values[name][i_val]) 1259 new_names_obs.append(name) 1260 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1261 for name in new_covobs: 1262 final_result[i_val].names.append(name) 1263 final_result[i_val]._covobs = new_covobs 1264 final_result[i_val]._value = new_val 1265 final_result[i_val].reweighted = reweighted 1266 1267 if multi == 0: 1268 final_result = final_result.item() 1269 1270 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1302def reweight(weight, obs, **kwargs): 1303 """Reweight a list of observables. 1304 1305 Parameters 1306 ---------- 1307 weight : Obs 1308 Reweighting factor. An Observable that has to be defined on a superset of the 1309 configurations in obs[i].idl for all i. 1310 obs : list 1311 list of Obs, e.g. [obs1, obs2, obs3]. 1312 all_configs : bool 1313 if True, the reweighted observables are normalized by the average of 1314 the reweighting factor on all configurations in weight.idl and not 1315 on the configurations in obs[i].idl. Default False. 1316 """ 1317 result = [] 1318 for i in range(len(obs)): 1319 if len(obs[i].cov_names): 1320 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1321 if not set(obs[i].names).issubset(weight.names): 1322 raise Exception('Error: Ensembles do not fit') 1323 for name in obs[i].names: 1324 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1325 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1326 new_samples = [] 1327 w_deltas = {} 1328 for name in sorted(obs[i].names): 1329 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1330 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1331 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1332 1333 if kwargs.get('all_configs'): 1334 new_weight = weight 1335 else: 1336 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1337 1338 result.append(tmp_obs / new_weight) 1339 result[-1].reweighted = True 1340 1341 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1344def correlate(obs_a, obs_b): 1345 """Correlate two observables. 1346 1347 Parameters 1348 ---------- 1349 obs_a : Obs 1350 First observable 1351 obs_b : Obs 1352 Second observable 1353 1354 Notes 1355 ----- 1356 Keep in mind to only correlate primary observables which have not been reweighted 1357 yet. The reweighting has to be applied after correlating the observables. 1358 Currently only works if ensembles are identical (this is not strictly necessary). 1359 """ 1360 1361 if sorted(obs_a.names) != sorted(obs_b.names): 1362 raise Exception(f"Ensembles do not fit {set(sorted(obs_a.names)) ^ set(sorted(obs_b.names))}") 1363 if len(obs_a.cov_names) or len(obs_b.cov_names): 1364 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1365 for name in obs_a.names: 1366 if obs_a.shape[name] != obs_b.shape[name]: 1367 raise Exception('Shapes of ensemble', name, 'do not fit') 1368 if obs_a.idl[name] != obs_b.idl[name]: 1369 raise Exception('idl of ensemble', name, 'do not fit') 1370 1371 if obs_a.reweighted is True: 1372 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1373 if obs_b.reweighted is True: 1374 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1375 1376 new_samples = [] 1377 new_idl = [] 1378 for name in sorted(obs_a.names): 1379 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1380 new_idl.append(obs_a.idl[name]) 1381 1382 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1383 o.reweighted = obs_a.reweighted or obs_b.reweighted 1384 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1387def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1388 r'''Calculates the error covariance matrix of a set of observables. 1389 1390 WARNING: This function should be used with care, especially for observables with support on multiple 1391 ensembles with differing autocorrelations. See the notes below for details. 1392 1393 The gamma method has to be applied first to all observables. 1394 1395 Parameters 1396 ---------- 1397 obs : list or numpy.ndarray 1398 List or one dimensional array of Obs 1399 visualize : bool 1400 If True plots the corresponding normalized correlation matrix (default False). 1401 correlation : bool 1402 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1403 smooth : None or int 1404 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1405 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1406 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1407 small ones. 1408 1409 Notes 1410 ----- 1411 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1412 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1413 in the absence of autocorrelation. 1414 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1415 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1416 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1417 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1418 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1419 ''' 1420 1421 length = len(obs) 1422 1423 max_samples = np.max([o.N for o in obs]) 1424 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1425 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1426 1427 cov = np.zeros((length, length)) 1428 for i in range(length): 1429 for j in range(i, length): 1430 cov[i, j] = _covariance_element(obs[i], obs[j]) 1431 cov = cov + cov.T - np.diag(np.diag(cov)) 1432 1433 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1434 1435 if isinstance(smooth, int): 1436 corr = _smooth_eigenvalues(corr, smooth) 1437 1438 if visualize: 1439 plt.matshow(corr, vmin=-1, vmax=1) 1440 plt.set_cmap('RdBu') 1441 plt.colorbar() 1442 plt.draw() 1443 1444 if correlation is True: 1445 return corr 1446 1447 errors = [o.dvalue for o in obs] 1448 cov = np.diag(errors) @ corr @ np.diag(errors) 1449 1450 eigenvalues = np.linalg.eigh(cov)[0] 1451 if not np.all(eigenvalues >= 0): 1452 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1453 1454 return cov
Calculates the error covariance matrix of a set of observables.
WARNING: This function should be used with care, especially for observables with support on multiple ensembles with differing autocorrelations. See the notes below for details.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1534def import_jackknife(jacks, name, idl=None): 1535 """Imports jackknife samples and returns an Obs 1536 1537 Parameters 1538 ---------- 1539 jacks : numpy.ndarray 1540 numpy array containing the mean value as zeroth entry and 1541 the N jackknife samples as first to Nth entry. 1542 name : str 1543 name of the ensemble the samples are defined on. 1544 """ 1545 length = len(jacks) - 1 1546 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1547 samples = jacks[1:] @ prj 1548 mean = np.mean(samples) 1549 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1550 new_obs._value = jacks[0] 1551 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1554def merge_obs(list_of_obs): 1555 """Combine all observables in list_of_obs into one new observable 1556 1557 Parameters 1558 ---------- 1559 list_of_obs : list 1560 list of the Obs object to be combined 1561 1562 Notes 1563 ----- 1564 It is not possible to combine obs which are based on the same replicum 1565 """ 1566 replist = [item for obs in list_of_obs for item in obs.names] 1567 if (len(replist) == len(set(replist))) is False: 1568 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1569 if any([len(o.cov_names) for o in list_of_obs]): 1570 raise Exception('Not possible to merge data that contains covobs!') 1571 new_dict = {} 1572 idl_dict = {} 1573 for o in list_of_obs: 1574 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1575 for key in set(o.deltas) | set(o.r_values)}) 1576 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1577 1578 names = sorted(new_dict.keys()) 1579 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1580 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1581 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1584def cov_Obs(means, cov, name, grad=None): 1585 """Create an Obs based on mean(s) and a covariance matrix 1586 1587 Parameters 1588 ---------- 1589 mean : list of floats or float 1590 N mean value(s) of the new Obs 1591 cov : list or array 1592 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1593 name : str 1594 identifier for the covariance matrix 1595 grad : list or array 1596 Gradient of the Covobs wrt. the means belonging to cov. 1597 """ 1598 1599 def covobs_to_obs(co): 1600 """Make an Obs out of a Covobs 1601 1602 Parameters 1603 ---------- 1604 co : Covobs 1605 Covobs to be embedded into the Obs 1606 """ 1607 o = Obs([], [], means=[]) 1608 o._value = co.value 1609 o.names.append(co.name) 1610 o._covobs[co.name] = co 1611 o._dvalue = np.sqrt(co.errsq()) 1612 return o 1613 1614 ol = [] 1615 if isinstance(means, (float, int)): 1616 means = [means] 1617 1618 for i in range(len(means)): 1619 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1620 if ol[0].covobs[name].N != len(means): 1621 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1622 if len(ol) == 1: 1623 return ol[0] 1624 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.