pyerrors.obs
1import warnings 2import hashlib 3import pickle 4from math import gcd 5from functools import reduce 6import numpy as np 7import autograd.numpy as anp # Thinly-wrapped numpy 8from autograd import jacobian 9import matplotlib.pyplot as plt 10from scipy.stats import skew, skewtest, kurtosis, kurtosistest 11import numdifftools as nd 12from itertools import groupby 13from .covobs import Covobs 14 15 16class Obs: 17 """Class for a general observable. 18 19 Instances of Obs are the basic objects of a pyerrors error analysis. 20 They are initialized with a list which contains arrays of samples for 21 different ensembles/replica and another list of same length which contains 22 the names of the ensembles/replica. Mathematical operations can be 23 performed on instances. The result is another instance of Obs. The error of 24 an instance can be computed with the gamma_method. Also contains additional 25 methods for output and visualization of the error calculation. 26 27 Attributes 28 ---------- 29 S_global : float 30 Standard value for S (default 2.0) 31 S_dict : dict 32 Dictionary for S values. If an entry for a given ensemble 33 exists this overwrites the standard value for that ensemble. 34 tau_exp_global : float 35 Standard value for tau_exp (default 0.0) 36 tau_exp_dict : dict 37 Dictionary for tau_exp values. If an entry for a given ensemble exists 38 this overwrites the standard value for that ensemble. 39 N_sigma_global : float 40 Standard value for N_sigma (default 1.0) 41 N_sigma_dict : dict 42 Dictionary for N_sigma values. If an entry for a given ensemble exists 43 this overwrites the standard value for that ensemble. 44 """ 45 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 46 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 47 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 48 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 49 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 50 51 S_global = 2.0 52 S_dict = {} 53 tau_exp_global = 0.0 54 tau_exp_dict = {} 55 N_sigma_global = 1.0 56 N_sigma_dict = {} 57 filter_eps = 1e-10 58 59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 271 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 272 # Make sure no entry of tauint is smaller than 0.5 273 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 274 # hep-lat/0306017 eq. (42) 275 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 276 self.e_n_dtauint[e_name][0] = 0.0 277 278 def _compute_drho(i): 279 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 280 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 281 282 _compute_drho(1) 283 if self.tau_exp[e_name] > 0: 284 texp = self.tau_exp[e_name] 285 # Critical slowing down analysis 286 if w_max // 2 <= 1: 287 raise Exception("Need at least 8 samples for tau_exp error analysis") 288 for n in range(1, w_max // 2): 289 _compute_drho(n + 1) 290 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 291 # Bias correction hep-lat/0306017 eq. (49) included 292 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 293 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 294 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 295 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 296 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 297 self.e_windowsize[e_name] = n 298 break 299 else: 300 if self.S[e_name] == 0.0: 301 self.e_tauint[e_name] = 0.5 302 self.e_dtauint[e_name] = 0.0 303 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 305 self.e_windowsize[e_name] = 0 306 else: 307 # Standard automatic windowing procedure 308 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 309 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 310 for n in range(1, w_max): 311 if n < w_max // 2 - 2: 312 _compute_drho(n + 1) 313 if g_w[n - 1] < 0 or n >= w_max - 1: 314 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 315 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 316 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 318 self.e_windowsize[e_name] = n 319 break 320 321 self._dvalue += self.e_dvalue[e_name] ** 2 322 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 323 324 for e_name in self.cov_names: 325 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 326 self.e_ddvalue[e_name] = 0 327 self._dvalue += self.e_dvalue[e_name]**2 328 329 self._dvalue = np.sqrt(self._dvalue) 330 if self._dvalue == 0.0: 331 self.ddvalue = 0.0 332 else: 333 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 334 return 335 336 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 337 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 338 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 339 340 Parameters 341 ---------- 342 deltas : list 343 List of fluctuations 344 idx : list 345 List or range of configurations on which the deltas are defined. 346 shape : int 347 Number of configurations in idx. 348 w_max : int 349 Upper bound for the summation window. 350 fft : bool 351 determines whether the fft algorithm is used for the computation 352 of the autocorrelation function. 353 """ 354 gamma = np.zeros(w_max) 355 deltas = _expand_deltas(deltas, idx, shape) 356 new_shape = len(deltas) 357 if fft: 358 max_gamma = min(new_shape, w_max) 359 # The padding for the fft has to be even 360 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 361 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 362 else: 363 for n in range(w_max): 364 if new_shape - n >= 0: 365 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 366 367 return gamma 368 369 def details(self, ens_content=True): 370 """Output detailed properties of the Obs. 371 372 Parameters 373 ---------- 374 ens_content : bool 375 print details about the ensembles and replica if true. 376 """ 377 if self.tag is not None: 378 print("Description:", self.tag) 379 if not hasattr(self, 'e_dvalue'): 380 print('Result\t %3.8e' % (self.value)) 381 else: 382 if self.value == 0.0: 383 percentage = np.nan 384 else: 385 percentage = np.abs(self._dvalue / self.value) * 100 386 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 387 if len(self.e_names) > 1: 388 print(' Ensemble errors:') 389 for e_name in self.mc_names: 390 if len(self.e_names) > 1: 391 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 392 if self.tau_exp[e_name] > 0: 393 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 394 else: 395 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 396 for e_name in self.cov_names: 397 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 398 if ens_content is True: 399 if len(self.e_names) == 1: 400 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 401 else: 402 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 403 my_string_list = [] 404 for key, value in sorted(self.e_content.items()): 405 if key not in self.covobs: 406 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 407 if len(value) == 1: 408 my_string += f': {self.shape[value[0]]} configurations' 409 if isinstance(self.idl[value[0]], range): 410 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 411 else: 412 my_string += ' (irregular range)' 413 else: 414 sublist = [] 415 for v in value: 416 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 417 my_substring += f': {self.shape[v]} configurations' 418 if isinstance(self.idl[v], range): 419 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 420 else: 421 my_substring += ' (irregular range)' 422 sublist.append(my_substring) 423 424 my_string += '\n' + '\n'.join(sublist) 425 else: 426 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 427 my_string_list.append(my_string) 428 print('\n'.join(my_string_list)) 429 430 def reweight(self, weight): 431 """Reweight the obs with given rewighting factors. 432 433 Parameters 434 ---------- 435 weight : Obs 436 Reweighting factor. An Observable that has to be defined on a superset of the 437 configurations in obs[i].idl for all i. 438 all_configs : bool 439 if True, the reweighted observables are normalized by the average of 440 the reweighting factor on all configurations in weight.idl and not 441 on the configurations in obs[i].idl. Default False. 442 """ 443 return reweight(weight, [self])[0] 444 445 def is_zero_within_error(self, sigma=1): 446 """Checks whether the observable is zero within 'sigma' standard errors. 447 448 Parameters 449 ---------- 450 sigma : int 451 Number of standard errors used for the check. 452 453 Works only properly when the gamma method was run. 454 """ 455 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 456 457 def is_zero(self, atol=1e-10): 458 """Checks whether the observable is zero within a given tolerance. 459 460 Parameters 461 ---------- 462 atol : float 463 Absolute tolerance (for details see numpy documentation). 464 """ 465 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 466 467 def plot_tauint(self, save=None): 468 """Plot integrated autocorrelation time for each ensemble. 469 470 Parameters 471 ---------- 472 save : str 473 saves the figure to a file named 'save' if. 474 """ 475 if not hasattr(self, 'e_dvalue'): 476 raise Exception('Run the gamma method first.') 477 478 for e, e_name in enumerate(self.mc_names): 479 fig = plt.figure() 480 plt.xlabel(r'$W$') 481 plt.ylabel(r'$\tau_\mathrm{int}$') 482 length = int(len(self.e_n_tauint[e_name])) 483 if self.tau_exp[e_name] > 0: 484 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 485 x_help = np.arange(2 * self.tau_exp[e_name]) 486 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 487 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 488 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 489 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 490 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 491 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 492 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 493 else: 494 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 495 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 496 497 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 498 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 499 plt.legend() 500 plt.xlim(-0.5, xmax) 501 ylim = plt.ylim() 502 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 503 plt.draw() 504 if save: 505 fig.savefig(save + "_" + str(e)) 506 507 def plot_rho(self, save=None): 508 """Plot normalized autocorrelation function time for each ensemble. 509 510 Parameters 511 ---------- 512 save : str 513 saves the figure to a file named 'save' if. 514 """ 515 if not hasattr(self, 'e_dvalue'): 516 raise Exception('Run the gamma method first.') 517 for e, e_name in enumerate(self.mc_names): 518 fig = plt.figure() 519 plt.xlabel('W') 520 plt.ylabel('rho') 521 length = int(len(self.e_drho[e_name])) 522 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 523 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 524 if self.tau_exp[e_name] > 0: 525 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 526 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 527 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 528 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 529 else: 530 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 531 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 532 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 533 plt.xlim(-0.5, xmax) 534 plt.draw() 535 if save: 536 fig.savefig(save + "_" + str(e)) 537 538 def plot_rep_dist(self): 539 """Plot replica distribution for each ensemble with more than one replicum.""" 540 if not hasattr(self, 'e_dvalue'): 541 raise Exception('Run the gamma method first.') 542 for e, e_name in enumerate(self.mc_names): 543 if len(self.e_content[e_name]) == 1: 544 print('No replica distribution for a single replicum (', e_name, ')') 545 continue 546 r_length = [] 547 sub_r_mean = 0 548 for r, r_name in enumerate(self.e_content[e_name]): 549 r_length.append(len(self.deltas[r_name])) 550 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 551 e_N = np.sum(r_length) 552 sub_r_mean /= e_N 553 arr = np.zeros(len(self.e_content[e_name])) 554 for r, r_name in enumerate(self.e_content[e_name]): 555 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 556 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 557 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 558 plt.draw() 559 560 def plot_history(self, expand=True): 561 """Plot derived Monte Carlo history for each ensemble 562 563 Parameters 564 ---------- 565 expand : bool 566 show expanded history for irregular Monte Carlo chains (default: True). 567 """ 568 for e, e_name in enumerate(self.mc_names): 569 plt.figure() 570 r_length = [] 571 tmp = [] 572 tmp_expanded = [] 573 for r, r_name in enumerate(self.e_content[e_name]): 574 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 575 if expand: 576 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 577 r_length.append(len(tmp_expanded[-1])) 578 else: 579 r_length.append(len(tmp[-1])) 580 e_N = np.sum(r_length) 581 x = np.arange(e_N) 582 y_test = np.concatenate(tmp, axis=0) 583 if expand: 584 y = np.concatenate(tmp_expanded, axis=0) 585 else: 586 y = y_test 587 plt.errorbar(x, y, fmt='.', markersize=3) 588 plt.xlim(-0.5, e_N - 0.5) 589 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 590 plt.draw() 591 592 def plot_piechart(self, save=None): 593 """Plot piechart which shows the fractional contribution of each 594 ensemble to the error and returns a dictionary containing the fractions. 595 596 Parameters 597 ---------- 598 save : str 599 saves the figure to a file named 'save' if. 600 """ 601 if not hasattr(self, 'e_dvalue'): 602 raise Exception('Run the gamma method first.') 603 if np.isclose(0.0, self._dvalue, atol=1e-15): 604 raise Exception('Error is 0.0') 605 labels = self.e_names 606 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 607 fig1, ax1 = plt.subplots() 608 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 609 ax1.axis('equal') 610 plt.draw() 611 if save: 612 fig1.savefig(save) 613 614 return dict(zip(self.e_names, sizes)) 615 616 def dump(self, filename, datatype="json.gz", description="", **kwargs): 617 """Dump the Obs to a file 'name' of chosen format. 618 619 Parameters 620 ---------- 621 filename : str 622 name of the file to be saved. 623 datatype : str 624 Format of the exported file. Supported formats include 625 "json.gz" and "pickle" 626 description : str 627 Description for output file, only relevant for json.gz format. 628 path : str 629 specifies a custom path for the file (default '.') 630 """ 631 if 'path' in kwargs: 632 file_name = kwargs.get('path') + '/' + filename 633 else: 634 file_name = filename 635 636 if datatype == "json.gz": 637 from .input.json import dump_to_json 638 dump_to_json([self], file_name, description=description) 639 elif datatype == "pickle": 640 with open(file_name + '.p', 'wb') as fb: 641 pickle.dump(self, fb) 642 else: 643 raise Exception("Unknown datatype " + str(datatype)) 644 645 def export_jackknife(self): 646 """Export jackknife samples from the Obs 647 648 Returns 649 ------- 650 numpy.ndarray 651 Returns a numpy array of length N + 1 where N is the number of samples 652 for the given ensemble and replicum. The zeroth entry of the array contains 653 the mean value of the Obs, entries 1 to N contain the N jackknife samples 654 derived from the Obs. The current implementation only works for observables 655 defined on exactly one ensemble and replicum. The derived jackknife samples 656 should agree with samples from a full jackknife analysis up to O(1/N). 657 """ 658 659 if len(self.names) != 1: 660 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 661 662 name = self.names[0] 663 full_data = self.deltas[name] + self.r_values[name] 664 n = full_data.size 665 mean = self.value 666 tmp_jacks = np.zeros(n + 1) 667 tmp_jacks[0] = mean 668 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 669 return tmp_jacks 670 671 def __float__(self): 672 return float(self.value) 673 674 def __repr__(self): 675 return 'Obs[' + str(self) + ']' 676 677 def __str__(self): 678 if self._dvalue == 0.0: 679 return str(self.value) 680 fexp = np.floor(np.log10(self._dvalue)) 681 if fexp < 0.0: 682 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 683 elif fexp == 0.0: 684 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 685 else: 686 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 687 688 def __hash__(self): 689 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 690 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 691 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 692 hash_tuple += tuple([o.encode() for o in self.names]) 693 m = hashlib.md5() 694 [m.update(o) for o in hash_tuple] 695 return int(m.hexdigest(), 16) & 0xFFFFFFFF 696 697 # Overload comparisons 698 def __lt__(self, other): 699 return self.value < other 700 701 def __le__(self, other): 702 return self.value <= other 703 704 def __gt__(self, other): 705 return self.value > other 706 707 def __ge__(self, other): 708 return self.value >= other 709 710 def __eq__(self, other): 711 return (self - other).is_zero() 712 713 def __ne__(self, other): 714 return not (self - other).is_zero() 715 716 # Overload math operations 717 def __add__(self, y): 718 if isinstance(y, Obs): 719 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 720 else: 721 if isinstance(y, np.ndarray): 722 return np.array([self + o for o in y]) 723 elif y.__class__.__name__ in ['Corr', 'CObs']: 724 return NotImplemented 725 else: 726 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 727 728 def __radd__(self, y): 729 return self + y 730 731 def __mul__(self, y): 732 if isinstance(y, Obs): 733 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 734 else: 735 if isinstance(y, np.ndarray): 736 return np.array([self * o for o in y]) 737 elif isinstance(y, complex): 738 return CObs(self * y.real, self * y.imag) 739 elif y.__class__.__name__ in ['Corr', 'CObs']: 740 return NotImplemented 741 else: 742 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 743 744 def __rmul__(self, y): 745 return self * y 746 747 def __sub__(self, y): 748 if isinstance(y, Obs): 749 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 750 else: 751 if isinstance(y, np.ndarray): 752 return np.array([self - o for o in y]) 753 elif y.__class__.__name__ in ['Corr', 'CObs']: 754 return NotImplemented 755 else: 756 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 757 758 def __rsub__(self, y): 759 return -1 * (self - y) 760 761 def __pos__(self): 762 return self 763 764 def __neg__(self): 765 return -1 * self 766 767 def __truediv__(self, y): 768 if isinstance(y, Obs): 769 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 770 else: 771 if isinstance(y, np.ndarray): 772 return np.array([self / o for o in y]) 773 elif y.__class__.__name__ in ['Corr', 'CObs']: 774 return NotImplemented 775 else: 776 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 777 778 def __rtruediv__(self, y): 779 if isinstance(y, Obs): 780 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 781 else: 782 if isinstance(y, np.ndarray): 783 return np.array([o / self for o in y]) 784 elif y.__class__.__name__ in ['Corr', 'CObs']: 785 return NotImplemented 786 else: 787 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 788 789 def __pow__(self, y): 790 if isinstance(y, Obs): 791 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 792 else: 793 return derived_observable(lambda x: x[0] ** y, [self]) 794 795 def __rpow__(self, y): 796 if isinstance(y, Obs): 797 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 798 else: 799 return derived_observable(lambda x: y ** x[0], [self]) 800 801 def __abs__(self): 802 return derived_observable(lambda x: anp.abs(x[0]), [self]) 803 804 # Overload numpy functions 805 def sqrt(self): 806 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 807 808 def log(self): 809 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 810 811 def exp(self): 812 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 813 814 def sin(self): 815 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 816 817 def cos(self): 818 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 819 820 def tan(self): 821 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 822 823 def arcsin(self): 824 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 825 826 def arccos(self): 827 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 828 829 def arctan(self): 830 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 831 832 def sinh(self): 833 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 834 835 def cosh(self): 836 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 837 838 def tanh(self): 839 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 840 841 def arcsinh(self): 842 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 843 844 def arccosh(self): 845 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 846 847 def arctanh(self): 848 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 849 850 851class CObs: 852 """Class for a complex valued observable.""" 853 __slots__ = ['_real', '_imag', 'tag'] 854 855 def __init__(self, real, imag=0.0): 856 self._real = real 857 self._imag = imag 858 self.tag = None 859 860 @property 861 def real(self): 862 return self._real 863 864 @property 865 def imag(self): 866 return self._imag 867 868 def gamma_method(self, **kwargs): 869 """Executes the gamma_method for the real and the imaginary part.""" 870 if isinstance(self.real, Obs): 871 self.real.gamma_method(**kwargs) 872 if isinstance(self.imag, Obs): 873 self.imag.gamma_method(**kwargs) 874 875 def is_zero(self): 876 """Checks whether both real and imaginary part are zero within machine precision.""" 877 return self.real == 0.0 and self.imag == 0.0 878 879 def conjugate(self): 880 return CObs(self.real, -self.imag) 881 882 def __add__(self, other): 883 if isinstance(other, np.ndarray): 884 return other + self 885 elif hasattr(other, 'real') and hasattr(other, 'imag'): 886 return CObs(self.real + other.real, 887 self.imag + other.imag) 888 else: 889 return CObs(self.real + other, self.imag) 890 891 def __radd__(self, y): 892 return self + y 893 894 def __sub__(self, other): 895 if isinstance(other, np.ndarray): 896 return -1 * (other - self) 897 elif hasattr(other, 'real') and hasattr(other, 'imag'): 898 return CObs(self.real - other.real, self.imag - other.imag) 899 else: 900 return CObs(self.real - other, self.imag) 901 902 def __rsub__(self, other): 903 return -1 * (self - other) 904 905 def __mul__(self, other): 906 if isinstance(other, np.ndarray): 907 return other * self 908 elif hasattr(other, 'real') and hasattr(other, 'imag'): 909 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 910 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 911 [self.real, other.real, self.imag, other.imag], 912 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 913 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 914 [self.real, other.real, self.imag, other.imag], 915 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 916 elif getattr(other, 'imag', 0) != 0: 917 return CObs(self.real * other.real - self.imag * other.imag, 918 self.imag * other.real + self.real * other.imag) 919 else: 920 return CObs(self.real * other.real, self.imag * other.real) 921 else: 922 return CObs(self.real * other, self.imag * other) 923 924 def __rmul__(self, other): 925 return self * other 926 927 def __truediv__(self, other): 928 if isinstance(other, np.ndarray): 929 return 1 / (other / self) 930 elif hasattr(other, 'real') and hasattr(other, 'imag'): 931 r = other.real ** 2 + other.imag ** 2 932 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 933 else: 934 return CObs(self.real / other, self.imag / other) 935 936 def __rtruediv__(self, other): 937 r = self.real ** 2 + self.imag ** 2 938 if hasattr(other, 'real') and hasattr(other, 'imag'): 939 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 940 else: 941 return CObs(self.real * other / r, -self.imag * other / r) 942 943 def __abs__(self): 944 return np.sqrt(self.real**2 + self.imag**2) 945 946 def __pos__(self): 947 return self 948 949 def __neg__(self): 950 return -1 * self 951 952 def __eq__(self, other): 953 return self.real == other.real and self.imag == other.imag 954 955 def __str__(self): 956 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 957 958 def __repr__(self): 959 return 'CObs[' + str(self) + ']' 960 961 962def _expand_deltas(deltas, idx, shape): 963 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 964 If idx is of type range, the deltas are not changed 965 966 Parameters 967 ---------- 968 deltas : list 969 List of fluctuations 970 idx : list 971 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 972 shape : int 973 Number of configs in idx. 974 """ 975 if isinstance(idx, range): 976 return deltas 977 else: 978 ret = np.zeros(idx[-1] - idx[0] + 1) 979 for i in range(shape): 980 ret[idx[i] - idx[0]] = deltas[i] 981 return ret 982 983 984def _merge_idx(idl): 985 """Returns the union of all lists in idl as sorted list 986 987 Parameters 988 ---------- 989 idl : list 990 List of lists or ranges. 991 """ 992 993 # Use groupby to efficiently check whether all elements of idl are identical 994 try: 995 g = groupby(idl) 996 if next(g, True) and not next(g, False): 997 return idl[0] 998 except Exception: 999 pass 1000 1001 if np.all([type(idx) is range for idx in idl]): 1002 if len(set([idx[0] for idx in idl])) == 1: 1003 idstart = min([idx.start for idx in idl]) 1004 idstop = max([idx.stop for idx in idl]) 1005 idstep = min([idx.step for idx in idl]) 1006 return range(idstart, idstop, idstep) 1007 1008 return sorted(set().union(*idl)) 1009 1010 1011def _intersection_idx(idl): 1012 """Returns the intersection of all lists in idl as sorted list 1013 1014 Parameters 1015 ---------- 1016 idl : list 1017 List of lists or ranges. 1018 """ 1019 1020 def _lcm(*args): 1021 """Returns the lowest common multiple of args. 1022 1023 From python 3.9 onwards the math library contains an lcm function.""" 1024 return reduce(lambda a, b: a * b // gcd(a, b), args) 1025 1026 # Use groupby to efficiently check whether all elements of idl are identical 1027 try: 1028 g = groupby(idl) 1029 if next(g, True) and not next(g, False): 1030 return idl[0] 1031 except Exception: 1032 pass 1033 1034 if np.all([type(idx) is range for idx in idl]): 1035 if len(set([idx[0] for idx in idl])) == 1: 1036 idstart = max([idx.start for idx in idl]) 1037 idstop = min([idx.stop for idx in idl]) 1038 idstep = _lcm(*[idx.step for idx in idl]) 1039 return range(idstart, idstop, idstep) 1040 1041 return sorted(set.intersection(*[set(o) for o in idl])) 1042 1043 1044def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1045 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1046 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1047 common divisor of the step sizes is used as new step size. 1048 1049 Parameters 1050 ---------- 1051 deltas : list 1052 List of fluctuations 1053 idx : list 1054 List or range of configs on which the deltas are defined. 1055 Has to be a subset of new_idx and has to be sorted in ascending order. 1056 shape : list 1057 Number of configs in idx. 1058 new_idx : list 1059 List of configs that defines the new range, has to be sorted in ascending order. 1060 """ 1061 1062 if type(idx) is range and type(new_idx) is range: 1063 if idx == new_idx: 1064 return deltas 1065 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1066 for i in range(shape): 1067 ret[idx[i] - new_idx[0]] = deltas[i] 1068 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1069 1070 1071def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): 1072 """Collapse deltas defined on idx to the list of configs that is defined by new_idx. 1073 If idx and new_idx are of type range, the smallest 1074 common divisor of the step sizes is used as new step size. 1075 1076 Parameters 1077 ---------- 1078 deltas : list 1079 List of fluctuations 1080 idx : list 1081 List or range of configs on which the deltas are defined. 1082 Has to be a subset of new_idx and has to be sorted in ascending order. 1083 shape : list 1084 Number of configs in idx. 1085 new_idx : list 1086 List of configs that defines the new range, has to be sorted in ascending order. 1087 """ 1088 1089 if type(idx) is range and type(new_idx) is range: 1090 if idx == new_idx: 1091 return deltas 1092 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1093 for i in range(shape): 1094 if idx[i] in new_idx: 1095 ret[idx[i] - new_idx[0]] = deltas[i] 1096 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1097 1098 1099def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1100 """Filter out all configurations with vanishing fluctuation such that they do not 1101 contribute to the error estimate anymore. Returns the new deltas and 1102 idx according to the filtering. 1103 A fluctuation is considered to be vanishing, if it is smaller than eps times 1104 the mean of the absolute values of all deltas in one list. 1105 1106 Parameters 1107 ---------- 1108 deltas : list 1109 List of fluctuations 1110 idx : list 1111 List or ranges of configs on which the deltas are defined. 1112 eps : float 1113 Prefactor that enters the filter criterion. 1114 """ 1115 new_deltas = [] 1116 new_idx = [] 1117 maxd = np.mean(np.fabs(deltas)) 1118 for i in range(len(deltas)): 1119 if abs(deltas[i]) > eps * maxd: 1120 new_deltas.append(deltas[i]) 1121 new_idx.append(idx[i]) 1122 if new_idx: 1123 return np.array(new_deltas), new_idx 1124 else: 1125 return deltas, idx 1126 1127 1128def derived_observable(func, data, array_mode=False, **kwargs): 1129 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1130 1131 Parameters 1132 ---------- 1133 func : object 1134 arbitrary function of the form func(data, **kwargs). For the 1135 automatic differentiation to work, all numpy functions have to have 1136 the autograd wrapper (use 'import autograd.numpy as anp'). 1137 data : list 1138 list of Obs, e.g. [obs1, obs2, obs3]. 1139 num_grad : bool 1140 if True, numerical derivatives are used instead of autograd 1141 (default False). To control the numerical differentiation the 1142 kwargs of numdifftools.step_generators.MaxStepGenerator 1143 can be used. 1144 man_grad : list 1145 manually supply a list or an array which contains the jacobian 1146 of func. Use cautiously, supplying the wrong derivative will 1147 not be intercepted. 1148 1149 Notes 1150 ----- 1151 For simple mathematical operations it can be practical to use anonymous 1152 functions. For the ratio of two observables one can e.g. use 1153 1154 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1155 """ 1156 1157 data = np.asarray(data) 1158 raveled_data = data.ravel() 1159 1160 # Workaround for matrix operations containing non Obs data 1161 if not all(isinstance(x, Obs) for x in raveled_data): 1162 for i in range(len(raveled_data)): 1163 if isinstance(raveled_data[i], (int, float)): 1164 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1165 1166 allcov = {} 1167 for o in raveled_data: 1168 for name in o.cov_names: 1169 if name in allcov: 1170 if not np.allclose(allcov[name], o.covobs[name].cov): 1171 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1172 else: 1173 allcov[name] = o.covobs[name].cov 1174 1175 n_obs = len(raveled_data) 1176 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1177 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1178 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1179 1180 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1181 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1182 1183 if data.ndim == 1: 1184 values = np.array([o.value for o in data]) 1185 else: 1186 values = np.vectorize(lambda x: x.value)(data) 1187 1188 new_values = func(values, **kwargs) 1189 1190 multi = int(isinstance(new_values, np.ndarray)) 1191 1192 new_r_values = {} 1193 new_idl_d = {} 1194 for name in new_sample_names: 1195 idl = [] 1196 tmp_values = np.zeros(n_obs) 1197 for i, item in enumerate(raveled_data): 1198 tmp_values[i] = item.r_values.get(name, item.value) 1199 tmp_idl = item.idl.get(name) 1200 if tmp_idl is not None: 1201 idl.append(tmp_idl) 1202 if multi > 0: 1203 tmp_values = np.array(tmp_values).reshape(data.shape) 1204 new_r_values[name] = func(tmp_values, **kwargs) 1205 new_idl_d[name] = _merge_idx(idl) 1206 if not is_merged[name]: 1207 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1208 1209 if 'man_grad' in kwargs: 1210 deriv = np.asarray(kwargs.get('man_grad')) 1211 if new_values.shape + data.shape != deriv.shape: 1212 raise Exception('Manual derivative does not have correct shape.') 1213 elif kwargs.get('num_grad') is True: 1214 if multi > 0: 1215 raise Exception('Multi mode currently not supported for numerical derivative') 1216 options = { 1217 'base_step': 0.1, 1218 'step_ratio': 2.5} 1219 for key in options.keys(): 1220 kwarg = kwargs.get(key) 1221 if kwarg is not None: 1222 options[key] = kwarg 1223 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1224 if tmp_df.size == 1: 1225 deriv = np.array([tmp_df.real]) 1226 else: 1227 deriv = tmp_df.real 1228 else: 1229 deriv = jacobian(func)(values, **kwargs) 1230 1231 final_result = np.zeros(new_values.shape, dtype=object) 1232 1233 if array_mode is True: 1234 1235 class _Zero_grad(): 1236 def __init__(self, N): 1237 self.grad = np.zeros((N, 1)) 1238 1239 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1240 d_extracted = {} 1241 g_extracted = {} 1242 for name in new_sample_names: 1243 d_extracted[name] = [] 1244 ens_length = len(new_idl_d[name]) 1245 for i_dat, dat in enumerate(data): 1246 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1247 for name in new_cov_names: 1248 g_extracted[name] = [] 1249 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1250 for i_dat, dat in enumerate(data): 1251 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1252 1253 for i_val, new_val in np.ndenumerate(new_values): 1254 new_deltas = {} 1255 new_grad = {} 1256 if array_mode is True: 1257 for name in new_sample_names: 1258 ens_length = d_extracted[name][0].shape[-1] 1259 new_deltas[name] = np.zeros(ens_length) 1260 for i_dat, dat in enumerate(d_extracted[name]): 1261 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1262 for name in new_cov_names: 1263 new_grad[name] = 0 1264 for i_dat, dat in enumerate(g_extracted[name]): 1265 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1266 else: 1267 for j_obs, obs in np.ndenumerate(data): 1268 for name in obs.names: 1269 if name in obs.cov_names: 1270 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1271 else: 1272 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1273 1274 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1275 1276 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1277 raise Exception('The same name has been used for deltas and covobs!') 1278 new_samples = [] 1279 new_means = [] 1280 new_idl = [] 1281 new_names_obs = [] 1282 for name in new_names: 1283 if name not in new_covobs: 1284 if is_merged[name]: 1285 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1286 else: 1287 filtered_deltas = new_deltas[name] 1288 filtered_idl_d = new_idl_d[name] 1289 1290 new_samples.append(filtered_deltas) 1291 new_idl.append(filtered_idl_d) 1292 new_means.append(new_r_values[name][i_val]) 1293 new_names_obs.append(name) 1294 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1295 for name in new_covobs: 1296 final_result[i_val].names.append(name) 1297 final_result[i_val]._covobs = new_covobs 1298 final_result[i_val]._value = new_val 1299 final_result[i_val].is_merged = is_merged 1300 final_result[i_val].reweighted = reweighted 1301 1302 if multi == 0: 1303 final_result = final_result.item() 1304 1305 return final_result 1306 1307 1308def _reduce_deltas(deltas, idx_old, idx_new): 1309 """Extract deltas defined on idx_old on all configs of idx_new. 1310 1311 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1312 are ordered in an ascending order. 1313 1314 Parameters 1315 ---------- 1316 deltas : list 1317 List of fluctuations 1318 idx_old : list 1319 List or range of configs on which the deltas are defined 1320 idx_new : list 1321 List of configs for which we want to extract the deltas. 1322 Has to be a subset of idx_old. 1323 """ 1324 if not len(deltas) == len(idx_old): 1325 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1326 if type(idx_old) is range and type(idx_new) is range: 1327 if idx_old == idx_new: 1328 return deltas 1329 shape = len(idx_new) 1330 ret = np.zeros(shape) 1331 oldpos = 0 1332 for i in range(shape): 1333 pos = -1 1334 for j in range(oldpos, len(idx_old)): 1335 if idx_old[j] == idx_new[i]: 1336 pos = j 1337 break 1338 if pos < 0: 1339 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1340 ret[i] = deltas[pos] 1341 oldpos = pos 1342 return np.array(ret) 1343 1344 1345def reweight(weight, obs, **kwargs): 1346 """Reweight a list of observables. 1347 1348 Parameters 1349 ---------- 1350 weight : Obs 1351 Reweighting factor. An Observable that has to be defined on a superset of the 1352 configurations in obs[i].idl for all i. 1353 obs : list 1354 list of Obs, e.g. [obs1, obs2, obs3]. 1355 all_configs : bool 1356 if True, the reweighted observables are normalized by the average of 1357 the reweighting factor on all configurations in weight.idl and not 1358 on the configurations in obs[i].idl. Default False. 1359 """ 1360 result = [] 1361 for i in range(len(obs)): 1362 if len(obs[i].cov_names): 1363 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1364 if not set(obs[i].names).issubset(weight.names): 1365 raise Exception('Error: Ensembles do not fit') 1366 for name in obs[i].names: 1367 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1368 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1369 new_samples = [] 1370 w_deltas = {} 1371 for name in sorted(obs[i].names): 1372 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1373 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1374 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1375 1376 if kwargs.get('all_configs'): 1377 new_weight = weight 1378 else: 1379 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1380 1381 result.append(tmp_obs / new_weight) 1382 result[-1].reweighted = True 1383 result[-1].is_merged = obs[i].is_merged 1384 1385 return result 1386 1387 1388def correlate(obs_a, obs_b): 1389 """Correlate two observables. 1390 1391 Parameters 1392 ---------- 1393 obs_a : Obs 1394 First observable 1395 obs_b : Obs 1396 Second observable 1397 1398 Notes 1399 ----- 1400 Keep in mind to only correlate primary observables which have not been reweighted 1401 yet. The reweighting has to be applied after correlating the observables. 1402 Currently only works if ensembles are identical (this is not strictly necessary). 1403 """ 1404 1405 if sorted(obs_a.names) != sorted(obs_b.names): 1406 raise Exception('Ensembles do not fit') 1407 if len(obs_a.cov_names) or len(obs_b.cov_names): 1408 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1409 for name in obs_a.names: 1410 if obs_a.shape[name] != obs_b.shape[name]: 1411 raise Exception('Shapes of ensemble', name, 'do not fit') 1412 if obs_a.idl[name] != obs_b.idl[name]: 1413 raise Exception('idl of ensemble', name, 'do not fit') 1414 1415 if obs_a.reweighted is True: 1416 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1417 if obs_b.reweighted is True: 1418 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1419 1420 new_samples = [] 1421 new_idl = [] 1422 for name in sorted(obs_a.names): 1423 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1424 new_idl.append(obs_a.idl[name]) 1425 1426 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1427 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1428 o.reweighted = obs_a.reweighted or obs_b.reweighted 1429 return o 1430 1431 1432def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1433 r'''Calculates the error covariance matrix of a set of observables. 1434 1435 The gamma method has to be applied first to all observables. 1436 1437 Parameters 1438 ---------- 1439 obs : list or numpy.ndarray 1440 List or one dimensional array of Obs 1441 visualize : bool 1442 If True plots the corresponding normalized correlation matrix (default False). 1443 correlation : bool 1444 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1445 smooth : None or int 1446 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1447 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1448 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1449 small ones. 1450 1451 Notes 1452 ----- 1453 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1454 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1455 in the absence of autocorrelation. 1456 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1457 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1458 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1459 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1460 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1461 ''' 1462 1463 length = len(obs) 1464 1465 max_samples = np.max([o.N for o in obs]) 1466 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1467 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1468 1469 cov = np.zeros((length, length)) 1470 for i in range(length): 1471 for j in range(i, length): 1472 cov[i, j] = _covariance_element(obs[i], obs[j]) 1473 cov = cov + cov.T - np.diag(np.diag(cov)) 1474 1475 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1476 1477 if isinstance(smooth, int): 1478 corr = _smooth_eigenvalues(corr, smooth) 1479 1480 if visualize: 1481 plt.matshow(corr, vmin=-1, vmax=1) 1482 plt.set_cmap('RdBu') 1483 plt.colorbar() 1484 plt.draw() 1485 1486 if correlation is True: 1487 return corr 1488 1489 errors = [o.dvalue for o in obs] 1490 cov = np.diag(errors) @ corr @ np.diag(errors) 1491 1492 eigenvalues = np.linalg.eigh(cov)[0] 1493 if not np.all(eigenvalues >= 0): 1494 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1495 1496 return cov 1497 1498 1499def _smooth_eigenvalues(corr, E): 1500 """Eigenvalue smoothing as described in hep-lat/9412087 1501 1502 corr : np.ndarray 1503 correlation matrix 1504 E : integer 1505 Number of eigenvalues to be left substantially unchanged 1506 """ 1507 if not (2 < E < corr.shape[0] - 1): 1508 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1509 vals, vec = np.linalg.eigh(corr) 1510 lambda_min = np.mean(vals[:-E]) 1511 vals[vals < lambda_min] = lambda_min 1512 vals /= np.mean(vals) 1513 return vec @ np.diag(vals) @ vec.T 1514 1515 1516def _covariance_element(obs1, obs2): 1517 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1518 1519 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1520 deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1521 deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1522 return np.sum(deltas1 * deltas2) 1523 1524 if set(obs1.names).isdisjoint(set(obs2.names)): 1525 return 0.0 1526 1527 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1528 raise Exception('The gamma method has to be applied to both Obs first.') 1529 1530 dvalue = 0.0 1531 1532 for e_name in obs1.mc_names: 1533 1534 if e_name not in obs2.mc_names: 1535 continue 1536 1537 idl_d = {} 1538 for r_name in obs1.e_content[e_name]: 1539 if r_name not in obs2.e_content[e_name]: 1540 continue 1541 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1542 1543 gamma = 0.0 1544 1545 for r_name in obs1.e_content[e_name]: 1546 if r_name not in obs2.e_content[e_name]: 1547 continue 1548 if len(idl_d[r_name]) == 0: 1549 continue 1550 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1551 1552 if gamma == 0.0: 1553 continue 1554 1555 gamma_div = 0.0 1556 for r_name in obs1.e_content[e_name]: 1557 if r_name not in obs2.e_content[e_name]: 1558 continue 1559 if len(idl_d[r_name]) == 0: 1560 continue 1561 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1562 gamma /= gamma_div 1563 1564 dvalue += gamma 1565 1566 for e_name in obs1.cov_names: 1567 1568 if e_name not in obs2.cov_names: 1569 continue 1570 1571 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1572 1573 return dvalue 1574 1575 1576def import_jackknife(jacks, name, idl=None): 1577 """Imports jackknife samples and returns an Obs 1578 1579 Parameters 1580 ---------- 1581 jacks : numpy.ndarray 1582 numpy array containing the mean value as zeroth entry and 1583 the N jackknife samples as first to Nth entry. 1584 name : str 1585 name of the ensemble the samples are defined on. 1586 """ 1587 length = len(jacks) - 1 1588 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1589 samples = jacks[1:] @ prj 1590 mean = np.mean(samples) 1591 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1592 new_obs._value = jacks[0] 1593 return new_obs 1594 1595 1596def merge_obs(list_of_obs): 1597 """Combine all observables in list_of_obs into one new observable 1598 1599 Parameters 1600 ---------- 1601 list_of_obs : list 1602 list of the Obs object to be combined 1603 1604 Notes 1605 ----- 1606 It is not possible to combine obs which are based on the same replicum 1607 """ 1608 replist = [item for obs in list_of_obs for item in obs.names] 1609 if (len(replist) == len(set(replist))) is False: 1610 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1611 if any([len(o.cov_names) for o in list_of_obs]): 1612 raise Exception('Not possible to merge data that contains covobs!') 1613 new_dict = {} 1614 idl_dict = {} 1615 for o in list_of_obs: 1616 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1617 for key in set(o.deltas) | set(o.r_values)}) 1618 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1619 1620 names = sorted(new_dict.keys()) 1621 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1622 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1623 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1624 return o 1625 1626 1627def cov_Obs(means, cov, name, grad=None): 1628 """Create an Obs based on mean(s) and a covariance matrix 1629 1630 Parameters 1631 ---------- 1632 mean : list of floats or float 1633 N mean value(s) of the new Obs 1634 cov : list or array 1635 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1636 name : str 1637 identifier for the covariance matrix 1638 grad : list or array 1639 Gradient of the Covobs wrt. the means belonging to cov. 1640 """ 1641 1642 def covobs_to_obs(co): 1643 """Make an Obs out of a Covobs 1644 1645 Parameters 1646 ---------- 1647 co : Covobs 1648 Covobs to be embedded into the Obs 1649 """ 1650 o = Obs([], [], means=[]) 1651 o._value = co.value 1652 o.names.append(co.name) 1653 o._covobs[co.name] = co 1654 o._dvalue = np.sqrt(co.errsq()) 1655 return o 1656 1657 ol = [] 1658 if isinstance(means, (float, int)): 1659 means = [means] 1660 1661 for i in range(len(means)): 1662 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1663 if ol[0].covobs[name].N != len(means): 1664 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1665 if len(ol) == 1: 1666 return ol[0] 1667 return ol
17class Obs: 18 """Class for a general observable. 19 20 Instances of Obs are the basic objects of a pyerrors error analysis. 21 They are initialized with a list which contains arrays of samples for 22 different ensembles/replica and another list of same length which contains 23 the names of the ensembles/replica. Mathematical operations can be 24 performed on instances. The result is another instance of Obs. The error of 25 an instance can be computed with the gamma_method. Also contains additional 26 methods for output and visualization of the error calculation. 27 28 Attributes 29 ---------- 30 S_global : float 31 Standard value for S (default 2.0) 32 S_dict : dict 33 Dictionary for S values. If an entry for a given ensemble 34 exists this overwrites the standard value for that ensemble. 35 tau_exp_global : float 36 Standard value for tau_exp (default 0.0) 37 tau_exp_dict : dict 38 Dictionary for tau_exp values. If an entry for a given ensemble exists 39 this overwrites the standard value for that ensemble. 40 N_sigma_global : float 41 Standard value for N_sigma (default 1.0) 42 N_sigma_dict : dict 43 Dictionary for N_sigma values. If an entry for a given ensemble exists 44 this overwrites the standard value for that ensemble. 45 """ 46 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 47 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 48 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 49 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 50 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 51 52 S_global = 2.0 53 S_dict = {} 54 tau_exp_global = 0.0 55 tau_exp_dict = {} 56 N_sigma_global = 1.0 57 N_sigma_dict = {} 58 filter_eps = 1e-10 59 60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise Exception('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise Exception('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise Exception('names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise Exception('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.is_merged = {} 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise Exception("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise Exception('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None 141 142 @property 143 def value(self): 144 return self._value 145 146 @property 147 def dvalue(self): 148 return self._dvalue 149 150 @property 151 def e_names(self): 152 return sorted(set([o.split('|')[0] for o in self.names])) 153 154 @property 155 def cov_names(self): 156 return sorted(set([o for o in self.covobs.keys()])) 157 158 @property 159 def mc_names(self): 160 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 161 162 @property 163 def e_content(self): 164 res = {} 165 for e, e_name in enumerate(self.e_names): 166 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 167 if e_name in self.names: 168 res[e_name].append(e_name) 169 return res 170 171 @property 172 def covobs(self): 173 return self._covobs 174 175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 272 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 273 # Make sure no entry of tauint is smaller than 0.5 274 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 275 # hep-lat/0306017 eq. (42) 276 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 277 self.e_n_dtauint[e_name][0] = 0.0 278 279 def _compute_drho(i): 280 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 281 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 282 283 _compute_drho(1) 284 if self.tau_exp[e_name] > 0: 285 texp = self.tau_exp[e_name] 286 # Critical slowing down analysis 287 if w_max // 2 <= 1: 288 raise Exception("Need at least 8 samples for tau_exp error analysis") 289 for n in range(1, w_max // 2): 290 _compute_drho(n + 1) 291 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 292 # Bias correction hep-lat/0306017 eq. (49) included 293 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 294 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 295 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 296 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 297 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 298 self.e_windowsize[e_name] = n 299 break 300 else: 301 if self.S[e_name] == 0.0: 302 self.e_tauint[e_name] = 0.5 303 self.e_dtauint[e_name] = 0.0 304 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 305 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 306 self.e_windowsize[e_name] = 0 307 else: 308 # Standard automatic windowing procedure 309 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 310 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 311 for n in range(1, w_max): 312 if n < w_max // 2 - 2: 313 _compute_drho(n + 1) 314 if g_w[n - 1] < 0 or n >= w_max - 1: 315 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 316 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 317 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 318 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 319 self.e_windowsize[e_name] = n 320 break 321 322 self._dvalue += self.e_dvalue[e_name] ** 2 323 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 324 325 for e_name in self.cov_names: 326 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 327 self.e_ddvalue[e_name] = 0 328 self._dvalue += self.e_dvalue[e_name]**2 329 330 self._dvalue = np.sqrt(self._dvalue) 331 if self._dvalue == 0.0: 332 self.ddvalue = 0.0 333 else: 334 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 335 return 336 337 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 338 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 339 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 340 341 Parameters 342 ---------- 343 deltas : list 344 List of fluctuations 345 idx : list 346 List or range of configurations on which the deltas are defined. 347 shape : int 348 Number of configurations in idx. 349 w_max : int 350 Upper bound for the summation window. 351 fft : bool 352 determines whether the fft algorithm is used for the computation 353 of the autocorrelation function. 354 """ 355 gamma = np.zeros(w_max) 356 deltas = _expand_deltas(deltas, idx, shape) 357 new_shape = len(deltas) 358 if fft: 359 max_gamma = min(new_shape, w_max) 360 # The padding for the fft has to be even 361 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 362 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 363 else: 364 for n in range(w_max): 365 if new_shape - n >= 0: 366 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 367 368 return gamma 369 370 def details(self, ens_content=True): 371 """Output detailed properties of the Obs. 372 373 Parameters 374 ---------- 375 ens_content : bool 376 print details about the ensembles and replica if true. 377 """ 378 if self.tag is not None: 379 print("Description:", self.tag) 380 if not hasattr(self, 'e_dvalue'): 381 print('Result\t %3.8e' % (self.value)) 382 else: 383 if self.value == 0.0: 384 percentage = np.nan 385 else: 386 percentage = np.abs(self._dvalue / self.value) * 100 387 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 388 if len(self.e_names) > 1: 389 print(' Ensemble errors:') 390 for e_name in self.mc_names: 391 if len(self.e_names) > 1: 392 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 393 if self.tau_exp[e_name] > 0: 394 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 395 else: 396 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 397 for e_name in self.cov_names: 398 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 399 if ens_content is True: 400 if len(self.e_names) == 1: 401 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 402 else: 403 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 404 my_string_list = [] 405 for key, value in sorted(self.e_content.items()): 406 if key not in self.covobs: 407 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 408 if len(value) == 1: 409 my_string += f': {self.shape[value[0]]} configurations' 410 if isinstance(self.idl[value[0]], range): 411 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 412 else: 413 my_string += ' (irregular range)' 414 else: 415 sublist = [] 416 for v in value: 417 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 418 my_substring += f': {self.shape[v]} configurations' 419 if isinstance(self.idl[v], range): 420 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 421 else: 422 my_substring += ' (irregular range)' 423 sublist.append(my_substring) 424 425 my_string += '\n' + '\n'.join(sublist) 426 else: 427 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 428 my_string_list.append(my_string) 429 print('\n'.join(my_string_list)) 430 431 def reweight(self, weight): 432 """Reweight the obs with given rewighting factors. 433 434 Parameters 435 ---------- 436 weight : Obs 437 Reweighting factor. An Observable that has to be defined on a superset of the 438 configurations in obs[i].idl for all i. 439 all_configs : bool 440 if True, the reweighted observables are normalized by the average of 441 the reweighting factor on all configurations in weight.idl and not 442 on the configurations in obs[i].idl. Default False. 443 """ 444 return reweight(weight, [self])[0] 445 446 def is_zero_within_error(self, sigma=1): 447 """Checks whether the observable is zero within 'sigma' standard errors. 448 449 Parameters 450 ---------- 451 sigma : int 452 Number of standard errors used for the check. 453 454 Works only properly when the gamma method was run. 455 """ 456 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 457 458 def is_zero(self, atol=1e-10): 459 """Checks whether the observable is zero within a given tolerance. 460 461 Parameters 462 ---------- 463 atol : float 464 Absolute tolerance (for details see numpy documentation). 465 """ 466 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 467 468 def plot_tauint(self, save=None): 469 """Plot integrated autocorrelation time for each ensemble. 470 471 Parameters 472 ---------- 473 save : str 474 saves the figure to a file named 'save' if. 475 """ 476 if not hasattr(self, 'e_dvalue'): 477 raise Exception('Run the gamma method first.') 478 479 for e, e_name in enumerate(self.mc_names): 480 fig = plt.figure() 481 plt.xlabel(r'$W$') 482 plt.ylabel(r'$\tau_\mathrm{int}$') 483 length = int(len(self.e_n_tauint[e_name])) 484 if self.tau_exp[e_name] > 0: 485 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 486 x_help = np.arange(2 * self.tau_exp[e_name]) 487 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 488 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 489 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 490 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 491 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 492 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 493 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 494 else: 495 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 496 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 497 498 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 499 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 500 plt.legend() 501 plt.xlim(-0.5, xmax) 502 ylim = plt.ylim() 503 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 504 plt.draw() 505 if save: 506 fig.savefig(save + "_" + str(e)) 507 508 def plot_rho(self, save=None): 509 """Plot normalized autocorrelation function time for each ensemble. 510 511 Parameters 512 ---------- 513 save : str 514 saves the figure to a file named 'save' if. 515 """ 516 if not hasattr(self, 'e_dvalue'): 517 raise Exception('Run the gamma method first.') 518 for e, e_name in enumerate(self.mc_names): 519 fig = plt.figure() 520 plt.xlabel('W') 521 plt.ylabel('rho') 522 length = int(len(self.e_drho[e_name])) 523 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 524 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 525 if self.tau_exp[e_name] > 0: 526 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 527 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 528 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 529 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 530 else: 531 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 532 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 533 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 534 plt.xlim(-0.5, xmax) 535 plt.draw() 536 if save: 537 fig.savefig(save + "_" + str(e)) 538 539 def plot_rep_dist(self): 540 """Plot replica distribution for each ensemble with more than one replicum.""" 541 if not hasattr(self, 'e_dvalue'): 542 raise Exception('Run the gamma method first.') 543 for e, e_name in enumerate(self.mc_names): 544 if len(self.e_content[e_name]) == 1: 545 print('No replica distribution for a single replicum (', e_name, ')') 546 continue 547 r_length = [] 548 sub_r_mean = 0 549 for r, r_name in enumerate(self.e_content[e_name]): 550 r_length.append(len(self.deltas[r_name])) 551 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 552 e_N = np.sum(r_length) 553 sub_r_mean /= e_N 554 arr = np.zeros(len(self.e_content[e_name])) 555 for r, r_name in enumerate(self.e_content[e_name]): 556 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 557 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 558 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 559 plt.draw() 560 561 def plot_history(self, expand=True): 562 """Plot derived Monte Carlo history for each ensemble 563 564 Parameters 565 ---------- 566 expand : bool 567 show expanded history for irregular Monte Carlo chains (default: True). 568 """ 569 for e, e_name in enumerate(self.mc_names): 570 plt.figure() 571 r_length = [] 572 tmp = [] 573 tmp_expanded = [] 574 for r, r_name in enumerate(self.e_content[e_name]): 575 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 576 if expand: 577 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 578 r_length.append(len(tmp_expanded[-1])) 579 else: 580 r_length.append(len(tmp[-1])) 581 e_N = np.sum(r_length) 582 x = np.arange(e_N) 583 y_test = np.concatenate(tmp, axis=0) 584 if expand: 585 y = np.concatenate(tmp_expanded, axis=0) 586 else: 587 y = y_test 588 plt.errorbar(x, y, fmt='.', markersize=3) 589 plt.xlim(-0.5, e_N - 0.5) 590 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 591 plt.draw() 592 593 def plot_piechart(self, save=None): 594 """Plot piechart which shows the fractional contribution of each 595 ensemble to the error and returns a dictionary containing the fractions. 596 597 Parameters 598 ---------- 599 save : str 600 saves the figure to a file named 'save' if. 601 """ 602 if not hasattr(self, 'e_dvalue'): 603 raise Exception('Run the gamma method first.') 604 if np.isclose(0.0, self._dvalue, atol=1e-15): 605 raise Exception('Error is 0.0') 606 labels = self.e_names 607 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 608 fig1, ax1 = plt.subplots() 609 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 610 ax1.axis('equal') 611 plt.draw() 612 if save: 613 fig1.savefig(save) 614 615 return dict(zip(self.e_names, sizes)) 616 617 def dump(self, filename, datatype="json.gz", description="", **kwargs): 618 """Dump the Obs to a file 'name' of chosen format. 619 620 Parameters 621 ---------- 622 filename : str 623 name of the file to be saved. 624 datatype : str 625 Format of the exported file. Supported formats include 626 "json.gz" and "pickle" 627 description : str 628 Description for output file, only relevant for json.gz format. 629 path : str 630 specifies a custom path for the file (default '.') 631 """ 632 if 'path' in kwargs: 633 file_name = kwargs.get('path') + '/' + filename 634 else: 635 file_name = filename 636 637 if datatype == "json.gz": 638 from .input.json import dump_to_json 639 dump_to_json([self], file_name, description=description) 640 elif datatype == "pickle": 641 with open(file_name + '.p', 'wb') as fb: 642 pickle.dump(self, fb) 643 else: 644 raise Exception("Unknown datatype " + str(datatype)) 645 646 def export_jackknife(self): 647 """Export jackknife samples from the Obs 648 649 Returns 650 ------- 651 numpy.ndarray 652 Returns a numpy array of length N + 1 where N is the number of samples 653 for the given ensemble and replicum. The zeroth entry of the array contains 654 the mean value of the Obs, entries 1 to N contain the N jackknife samples 655 derived from the Obs. The current implementation only works for observables 656 defined on exactly one ensemble and replicum. The derived jackknife samples 657 should agree with samples from a full jackknife analysis up to O(1/N). 658 """ 659 660 if len(self.names) != 1: 661 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 662 663 name = self.names[0] 664 full_data = self.deltas[name] + self.r_values[name] 665 n = full_data.size 666 mean = self.value 667 tmp_jacks = np.zeros(n + 1) 668 tmp_jacks[0] = mean 669 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 670 return tmp_jacks 671 672 def __float__(self): 673 return float(self.value) 674 675 def __repr__(self): 676 return 'Obs[' + str(self) + ']' 677 678 def __str__(self): 679 if self._dvalue == 0.0: 680 return str(self.value) 681 fexp = np.floor(np.log10(self._dvalue)) 682 if fexp < 0.0: 683 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 684 elif fexp == 0.0: 685 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 686 else: 687 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 688 689 def __hash__(self): 690 hash_tuple = (np.array([self.value]).astype(np.float32).data.tobytes(),) 691 hash_tuple += tuple([o.astype(np.float32).data.tobytes() for o in self.deltas.values()]) 692 hash_tuple += tuple([np.array([o.errsq()]).astype(np.float32).data.tobytes() for o in self.covobs.values()]) 693 hash_tuple += tuple([o.encode() for o in self.names]) 694 m = hashlib.md5() 695 [m.update(o) for o in hash_tuple] 696 return int(m.hexdigest(), 16) & 0xFFFFFFFF 697 698 # Overload comparisons 699 def __lt__(self, other): 700 return self.value < other 701 702 def __le__(self, other): 703 return self.value <= other 704 705 def __gt__(self, other): 706 return self.value > other 707 708 def __ge__(self, other): 709 return self.value >= other 710 711 def __eq__(self, other): 712 return (self - other).is_zero() 713 714 def __ne__(self, other): 715 return not (self - other).is_zero() 716 717 # Overload math operations 718 def __add__(self, y): 719 if isinstance(y, Obs): 720 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 721 else: 722 if isinstance(y, np.ndarray): 723 return np.array([self + o for o in y]) 724 elif y.__class__.__name__ in ['Corr', 'CObs']: 725 return NotImplemented 726 else: 727 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 728 729 def __radd__(self, y): 730 return self + y 731 732 def __mul__(self, y): 733 if isinstance(y, Obs): 734 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 735 else: 736 if isinstance(y, np.ndarray): 737 return np.array([self * o for o in y]) 738 elif isinstance(y, complex): 739 return CObs(self * y.real, self * y.imag) 740 elif y.__class__.__name__ in ['Corr', 'CObs']: 741 return NotImplemented 742 else: 743 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 744 745 def __rmul__(self, y): 746 return self * y 747 748 def __sub__(self, y): 749 if isinstance(y, Obs): 750 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 751 else: 752 if isinstance(y, np.ndarray): 753 return np.array([self - o for o in y]) 754 elif y.__class__.__name__ in ['Corr', 'CObs']: 755 return NotImplemented 756 else: 757 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 758 759 def __rsub__(self, y): 760 return -1 * (self - y) 761 762 def __pos__(self): 763 return self 764 765 def __neg__(self): 766 return -1 * self 767 768 def __truediv__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 771 else: 772 if isinstance(y, np.ndarray): 773 return np.array([self / o for o in y]) 774 elif y.__class__.__name__ in ['Corr', 'CObs']: 775 return NotImplemented 776 else: 777 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 778 779 def __rtruediv__(self, y): 780 if isinstance(y, Obs): 781 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 782 else: 783 if isinstance(y, np.ndarray): 784 return np.array([o / self for o in y]) 785 elif y.__class__.__name__ in ['Corr', 'CObs']: 786 return NotImplemented 787 else: 788 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 789 790 def __pow__(self, y): 791 if isinstance(y, Obs): 792 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 793 else: 794 return derived_observable(lambda x: x[0] ** y, [self]) 795 796 def __rpow__(self, y): 797 if isinstance(y, Obs): 798 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 799 else: 800 return derived_observable(lambda x: y ** x[0], [self]) 801 802 def __abs__(self): 803 return derived_observable(lambda x: anp.abs(x[0]), [self]) 804 805 # Overload numpy functions 806 def sqrt(self): 807 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 808 809 def log(self): 810 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 811 812 def exp(self): 813 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 814 815 def sin(self): 816 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 817 818 def cos(self): 819 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 820 821 def tan(self): 822 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 823 824 def arcsin(self): 825 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 826 827 def arccos(self): 828 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 829 830 def arctan(self): 831 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 832 833 def sinh(self): 834 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 835 836 def cosh(self): 837 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 838 839 def tanh(self): 840 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 841 842 def arcsinh(self): 843 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 844 845 def arccosh(self): 846 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 847 848 def arctanh(self): 849 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
60 def __init__(self, samples, names, idl=None, **kwargs): 61 """ Initialize Obs object. 62 63 Parameters 64 ---------- 65 samples : list 66 list of numpy arrays containing the Monte Carlo samples 67 names : list 68 list of strings labeling the individual samples 69 idl : list, optional 70 list of ranges or lists on which the samples are defined 71 """ 72 73 if kwargs.get("means") is None and len(samples): 74 if len(samples) != len(names): 75 raise Exception('Length of samples and names incompatible.') 76 if idl is not None: 77 if len(idl) != len(names): 78 raise Exception('Length of idl incompatible with samples and names.') 79 name_length = len(names) 80 if name_length > 1: 81 if name_length != len(set(names)): 82 raise Exception('names are not unique.') 83 if not all(isinstance(x, str) for x in names): 84 raise TypeError('All names have to be strings.') 85 else: 86 if not isinstance(names[0], str): 87 raise TypeError('All names have to be strings.') 88 if min(len(x) for x in samples) <= 4: 89 raise Exception('Samples have to have at least 5 entries.') 90 91 self.names = sorted(names) 92 self.shape = {} 93 self.r_values = {} 94 self.deltas = {} 95 self._covobs = {} 96 97 self._value = 0 98 self.N = 0 99 self.is_merged = {} 100 self.idl = {} 101 if idl is not None: 102 for name, idx in sorted(zip(names, idl)): 103 if isinstance(idx, range): 104 self.idl[name] = idx 105 elif isinstance(idx, (list, np.ndarray)): 106 dc = np.unique(np.diff(idx)) 107 if np.any(dc < 0): 108 raise Exception("Unsorted idx for idl[%s]" % (name)) 109 if len(dc) == 1: 110 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 111 else: 112 self.idl[name] = list(idx) 113 else: 114 raise Exception('incompatible type for idl[%s].' % (name)) 115 else: 116 for name, sample in sorted(zip(names, samples)): 117 self.idl[name] = range(1, len(sample) + 1) 118 119 if kwargs.get("means") is not None: 120 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 121 self.shape[name] = len(self.idl[name]) 122 self.N += self.shape[name] 123 self.r_values[name] = mean 124 self.deltas[name] = sample 125 else: 126 for name, sample in sorted(zip(names, samples)): 127 self.shape[name] = len(self.idl[name]) 128 self.N += self.shape[name] 129 if len(sample) != self.shape[name]: 130 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 131 self.r_values[name] = np.mean(sample) 132 self.deltas[name] = sample - self.r_values[name] 133 self._value += self.shape[name] * self.r_values[name] 134 self._value /= self.N 135 136 self._dvalue = 0.0 137 self.ddvalue = 0.0 138 self.reweighted = False 139 140 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
175 def gamma_method(self, **kwargs): 176 """Estimate the error and related properties of the Obs. 177 178 Parameters 179 ---------- 180 S : float 181 specifies a custom value for the parameter S (default 2.0). 182 If set to 0 it is assumed that the data exhibits no 183 autocorrelation. In this case the error estimates coincides 184 with the sample standard error. 185 tau_exp : float 186 positive value triggers the critical slowing down analysis 187 (default 0.0). 188 N_sigma : float 189 number of standard deviations from zero until the tail is 190 attached to the autocorrelation function (default 1). 191 fft : bool 192 determines whether the fft algorithm is used for the computation 193 of the autocorrelation function (default True) 194 """ 195 196 e_content = self.e_content 197 self.e_dvalue = {} 198 self.e_ddvalue = {} 199 self.e_tauint = {} 200 self.e_dtauint = {} 201 self.e_windowsize = {} 202 self.e_n_tauint = {} 203 self.e_n_dtauint = {} 204 e_gamma = {} 205 self.e_rho = {} 206 self.e_drho = {} 207 self._dvalue = 0 208 self.ddvalue = 0 209 210 self.S = {} 211 self.tau_exp = {} 212 self.N_sigma = {} 213 214 if kwargs.get('fft') is False: 215 fft = False 216 else: 217 fft = True 218 219 def _parse_kwarg(kwarg_name): 220 if kwarg_name in kwargs: 221 tmp = kwargs.get(kwarg_name) 222 if isinstance(tmp, (int, float)): 223 if tmp < 0: 224 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 225 for e, e_name in enumerate(self.e_names): 226 getattr(self, kwarg_name)[e_name] = tmp 227 else: 228 raise TypeError(kwarg_name + ' is not in proper format.') 229 else: 230 for e, e_name in enumerate(self.e_names): 231 if e_name in getattr(Obs, kwarg_name + '_dict'): 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 233 else: 234 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 235 236 _parse_kwarg('S') 237 _parse_kwarg('tau_exp') 238 _parse_kwarg('N_sigma') 239 240 for e, e_name in enumerate(self.mc_names): 241 r_length = [] 242 for r_name in e_content[e_name]: 243 if isinstance(self.idl[r_name], range): 244 r_length.append(len(self.idl[r_name])) 245 else: 246 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 247 248 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 249 w_max = max(r_length) // 2 250 e_gamma[e_name] = np.zeros(w_max) 251 self.e_rho[e_name] = np.zeros(w_max) 252 self.e_drho[e_name] = np.zeros(w_max) 253 254 for r_name in e_content[e_name]: 255 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 256 257 gamma_div = np.zeros(w_max) 258 for r_name in e_content[e_name]: 259 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 260 gamma_div[gamma_div < 1] = 1.0 261 e_gamma[e_name] /= gamma_div[:w_max] 262 263 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 264 self.e_tauint[e_name] = 0.5 265 self.e_dtauint[e_name] = 0.0 266 self.e_dvalue[e_name] = 0.0 267 self.e_ddvalue[e_name] = 0.0 268 self.e_windowsize[e_name] = 0 269 continue 270 271 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 272 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 273 # Make sure no entry of tauint is smaller than 0.5 274 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 275 # hep-lat/0306017 eq. (42) 276 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 277 self.e_n_dtauint[e_name][0] = 0.0 278 279 def _compute_drho(i): 280 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 281 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 282 283 _compute_drho(1) 284 if self.tau_exp[e_name] > 0: 285 texp = self.tau_exp[e_name] 286 # Critical slowing down analysis 287 if w_max // 2 <= 1: 288 raise Exception("Need at least 8 samples for tau_exp error analysis") 289 for n in range(1, w_max // 2): 290 _compute_drho(n + 1) 291 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 292 # Bias correction hep-lat/0306017 eq. (49) included 293 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 294 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 295 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 296 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 297 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 298 self.e_windowsize[e_name] = n 299 break 300 else: 301 if self.S[e_name] == 0.0: 302 self.e_tauint[e_name] = 0.5 303 self.e_dtauint[e_name] = 0.0 304 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 305 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 306 self.e_windowsize[e_name] = 0 307 else: 308 # Standard automatic windowing procedure 309 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 310 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 311 for n in range(1, w_max): 312 if n < w_max // 2 - 2: 313 _compute_drho(n + 1) 314 if g_w[n - 1] < 0 or n >= w_max - 1: 315 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 316 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 317 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 318 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 319 self.e_windowsize[e_name] = n 320 break 321 322 self._dvalue += self.e_dvalue[e_name] ** 2 323 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 324 325 for e_name in self.cov_names: 326 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 327 self.e_ddvalue[e_name] = 0 328 self._dvalue += self.e_dvalue[e_name]**2 329 330 self._dvalue = np.sqrt(self._dvalue) 331 if self._dvalue == 0.0: 332 self.ddvalue = 0.0 333 else: 334 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 335 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
370 def details(self, ens_content=True): 371 """Output detailed properties of the Obs. 372 373 Parameters 374 ---------- 375 ens_content : bool 376 print details about the ensembles and replica if true. 377 """ 378 if self.tag is not None: 379 print("Description:", self.tag) 380 if not hasattr(self, 'e_dvalue'): 381 print('Result\t %3.8e' % (self.value)) 382 else: 383 if self.value == 0.0: 384 percentage = np.nan 385 else: 386 percentage = np.abs(self._dvalue / self.value) * 100 387 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 388 if len(self.e_names) > 1: 389 print(' Ensemble errors:') 390 for e_name in self.mc_names: 391 if len(self.e_names) > 1: 392 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 393 if self.tau_exp[e_name] > 0: 394 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 395 else: 396 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 397 for e_name in self.cov_names: 398 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 399 if ens_content is True: 400 if len(self.e_names) == 1: 401 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 402 else: 403 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 404 my_string_list = [] 405 for key, value in sorted(self.e_content.items()): 406 if key not in self.covobs: 407 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 408 if len(value) == 1: 409 my_string += f': {self.shape[value[0]]} configurations' 410 if isinstance(self.idl[value[0]], range): 411 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 412 else: 413 my_string += ' (irregular range)' 414 else: 415 sublist = [] 416 for v in value: 417 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 418 my_substring += f': {self.shape[v]} configurations' 419 if isinstance(self.idl[v], range): 420 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 421 else: 422 my_substring += ' (irregular range)' 423 sublist.append(my_substring) 424 425 my_string += '\n' + '\n'.join(sublist) 426 else: 427 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 428 my_string_list.append(my_string) 429 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
431 def reweight(self, weight): 432 """Reweight the obs with given rewighting factors. 433 434 Parameters 435 ---------- 436 weight : Obs 437 Reweighting factor. An Observable that has to be defined on a superset of the 438 configurations in obs[i].idl for all i. 439 all_configs : bool 440 if True, the reweighted observables are normalized by the average of 441 the reweighting factor on all configurations in weight.idl and not 442 on the configurations in obs[i].idl. Default False. 443 """ 444 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
446 def is_zero_within_error(self, sigma=1): 447 """Checks whether the observable is zero within 'sigma' standard errors. 448 449 Parameters 450 ---------- 451 sigma : int 452 Number of standard errors used for the check. 453 454 Works only properly when the gamma method was run. 455 """ 456 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
458 def is_zero(self, atol=1e-10): 459 """Checks whether the observable is zero within a given tolerance. 460 461 Parameters 462 ---------- 463 atol : float 464 Absolute tolerance (for details see numpy documentation). 465 """ 466 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
468 def plot_tauint(self, save=None): 469 """Plot integrated autocorrelation time for each ensemble. 470 471 Parameters 472 ---------- 473 save : str 474 saves the figure to a file named 'save' if. 475 """ 476 if not hasattr(self, 'e_dvalue'): 477 raise Exception('Run the gamma method first.') 478 479 for e, e_name in enumerate(self.mc_names): 480 fig = plt.figure() 481 plt.xlabel(r'$W$') 482 plt.ylabel(r'$\tau_\mathrm{int}$') 483 length = int(len(self.e_n_tauint[e_name])) 484 if self.tau_exp[e_name] > 0: 485 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 486 x_help = np.arange(2 * self.tau_exp[e_name]) 487 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 488 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 489 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 490 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 491 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 492 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 493 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 494 else: 495 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 496 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 497 498 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 499 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 500 plt.legend() 501 plt.xlim(-0.5, xmax) 502 ylim = plt.ylim() 503 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 504 plt.draw() 505 if save: 506 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
508 def plot_rho(self, save=None): 509 """Plot normalized autocorrelation function time for each ensemble. 510 511 Parameters 512 ---------- 513 save : str 514 saves the figure to a file named 'save' if. 515 """ 516 if not hasattr(self, 'e_dvalue'): 517 raise Exception('Run the gamma method first.') 518 for e, e_name in enumerate(self.mc_names): 519 fig = plt.figure() 520 plt.xlabel('W') 521 plt.ylabel('rho') 522 length = int(len(self.e_drho[e_name])) 523 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 524 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 525 if self.tau_exp[e_name] > 0: 526 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 527 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 528 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 529 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 530 else: 531 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 532 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 533 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 534 plt.xlim(-0.5, xmax) 535 plt.draw() 536 if save: 537 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
539 def plot_rep_dist(self): 540 """Plot replica distribution for each ensemble with more than one replicum.""" 541 if not hasattr(self, 'e_dvalue'): 542 raise Exception('Run the gamma method first.') 543 for e, e_name in enumerate(self.mc_names): 544 if len(self.e_content[e_name]) == 1: 545 print('No replica distribution for a single replicum (', e_name, ')') 546 continue 547 r_length = [] 548 sub_r_mean = 0 549 for r, r_name in enumerate(self.e_content[e_name]): 550 r_length.append(len(self.deltas[r_name])) 551 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 552 e_N = np.sum(r_length) 553 sub_r_mean /= e_N 554 arr = np.zeros(len(self.e_content[e_name])) 555 for r, r_name in enumerate(self.e_content[e_name]): 556 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 557 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 558 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 559 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
561 def plot_history(self, expand=True): 562 """Plot derived Monte Carlo history for each ensemble 563 564 Parameters 565 ---------- 566 expand : bool 567 show expanded history for irregular Monte Carlo chains (default: True). 568 """ 569 for e, e_name in enumerate(self.mc_names): 570 plt.figure() 571 r_length = [] 572 tmp = [] 573 tmp_expanded = [] 574 for r, r_name in enumerate(self.e_content[e_name]): 575 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 576 if expand: 577 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 578 r_length.append(len(tmp_expanded[-1])) 579 else: 580 r_length.append(len(tmp[-1])) 581 e_N = np.sum(r_length) 582 x = np.arange(e_N) 583 y_test = np.concatenate(tmp, axis=0) 584 if expand: 585 y = np.concatenate(tmp_expanded, axis=0) 586 else: 587 y = y_test 588 plt.errorbar(x, y, fmt='.', markersize=3) 589 plt.xlim(-0.5, e_N - 0.5) 590 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 591 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
593 def plot_piechart(self, save=None): 594 """Plot piechart which shows the fractional contribution of each 595 ensemble to the error and returns a dictionary containing the fractions. 596 597 Parameters 598 ---------- 599 save : str 600 saves the figure to a file named 'save' if. 601 """ 602 if not hasattr(self, 'e_dvalue'): 603 raise Exception('Run the gamma method first.') 604 if np.isclose(0.0, self._dvalue, atol=1e-15): 605 raise Exception('Error is 0.0') 606 labels = self.e_names 607 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 608 fig1, ax1 = plt.subplots() 609 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 610 ax1.axis('equal') 611 plt.draw() 612 if save: 613 fig1.savefig(save) 614 615 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
617 def dump(self, filename, datatype="json.gz", description="", **kwargs): 618 """Dump the Obs to a file 'name' of chosen format. 619 620 Parameters 621 ---------- 622 filename : str 623 name of the file to be saved. 624 datatype : str 625 Format of the exported file. Supported formats include 626 "json.gz" and "pickle" 627 description : str 628 Description for output file, only relevant for json.gz format. 629 path : str 630 specifies a custom path for the file (default '.') 631 """ 632 if 'path' in kwargs: 633 file_name = kwargs.get('path') + '/' + filename 634 else: 635 file_name = filename 636 637 if datatype == "json.gz": 638 from .input.json import dump_to_json 639 dump_to_json([self], file_name, description=description) 640 elif datatype == "pickle": 641 with open(file_name + '.p', 'wb') as fb: 642 pickle.dump(self, fb) 643 else: 644 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
646 def export_jackknife(self): 647 """Export jackknife samples from the Obs 648 649 Returns 650 ------- 651 numpy.ndarray 652 Returns a numpy array of length N + 1 where N is the number of samples 653 for the given ensemble and replicum. The zeroth entry of the array contains 654 the mean value of the Obs, entries 1 to N contain the N jackknife samples 655 derived from the Obs. The current implementation only works for observables 656 defined on exactly one ensemble and replicum. The derived jackknife samples 657 should agree with samples from a full jackknife analysis up to O(1/N). 658 """ 659 660 if len(self.names) != 1: 661 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 662 663 name = self.names[0] 664 full_data = self.deltas[name] + self.r_values[name] 665 n = full_data.size 666 mean = self.value 667 tmp_jacks = np.zeros(n + 1) 668 tmp_jacks[0] = mean 669 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 670 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
852class CObs: 853 """Class for a complex valued observable.""" 854 __slots__ = ['_real', '_imag', 'tag'] 855 856 def __init__(self, real, imag=0.0): 857 self._real = real 858 self._imag = imag 859 self.tag = None 860 861 @property 862 def real(self): 863 return self._real 864 865 @property 866 def imag(self): 867 return self._imag 868 869 def gamma_method(self, **kwargs): 870 """Executes the gamma_method for the real and the imaginary part.""" 871 if isinstance(self.real, Obs): 872 self.real.gamma_method(**kwargs) 873 if isinstance(self.imag, Obs): 874 self.imag.gamma_method(**kwargs) 875 876 def is_zero(self): 877 """Checks whether both real and imaginary part are zero within machine precision.""" 878 return self.real == 0.0 and self.imag == 0.0 879 880 def conjugate(self): 881 return CObs(self.real, -self.imag) 882 883 def __add__(self, other): 884 if isinstance(other, np.ndarray): 885 return other + self 886 elif hasattr(other, 'real') and hasattr(other, 'imag'): 887 return CObs(self.real + other.real, 888 self.imag + other.imag) 889 else: 890 return CObs(self.real + other, self.imag) 891 892 def __radd__(self, y): 893 return self + y 894 895 def __sub__(self, other): 896 if isinstance(other, np.ndarray): 897 return -1 * (other - self) 898 elif hasattr(other, 'real') and hasattr(other, 'imag'): 899 return CObs(self.real - other.real, self.imag - other.imag) 900 else: 901 return CObs(self.real - other, self.imag) 902 903 def __rsub__(self, other): 904 return -1 * (self - other) 905 906 def __mul__(self, other): 907 if isinstance(other, np.ndarray): 908 return other * self 909 elif hasattr(other, 'real') and hasattr(other, 'imag'): 910 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 911 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 912 [self.real, other.real, self.imag, other.imag], 913 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 914 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 915 [self.real, other.real, self.imag, other.imag], 916 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 917 elif getattr(other, 'imag', 0) != 0: 918 return CObs(self.real * other.real - self.imag * other.imag, 919 self.imag * other.real + self.real * other.imag) 920 else: 921 return CObs(self.real * other.real, self.imag * other.real) 922 else: 923 return CObs(self.real * other, self.imag * other) 924 925 def __rmul__(self, other): 926 return self * other 927 928 def __truediv__(self, other): 929 if isinstance(other, np.ndarray): 930 return 1 / (other / self) 931 elif hasattr(other, 'real') and hasattr(other, 'imag'): 932 r = other.real ** 2 + other.imag ** 2 933 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 934 else: 935 return CObs(self.real / other, self.imag / other) 936 937 def __rtruediv__(self, other): 938 r = self.real ** 2 + self.imag ** 2 939 if hasattr(other, 'real') and hasattr(other, 'imag'): 940 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 941 else: 942 return CObs(self.real * other / r, -self.imag * other / r) 943 944 def __abs__(self): 945 return np.sqrt(self.real**2 + self.imag**2) 946 947 def __pos__(self): 948 return self 949 950 def __neg__(self): 951 return -1 * self 952 953 def __eq__(self, other): 954 return self.real == other.real and self.imag == other.imag 955 956 def __str__(self): 957 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 958 959 def __repr__(self): 960 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
869 def gamma_method(self, **kwargs): 870 """Executes the gamma_method for the real and the imaginary part.""" 871 if isinstance(self.real, Obs): 872 self.real.gamma_method(**kwargs) 873 if isinstance(self.imag, Obs): 874 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1129def derived_observable(func, data, array_mode=False, **kwargs): 1130 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1131 1132 Parameters 1133 ---------- 1134 func : object 1135 arbitrary function of the form func(data, **kwargs). For the 1136 automatic differentiation to work, all numpy functions have to have 1137 the autograd wrapper (use 'import autograd.numpy as anp'). 1138 data : list 1139 list of Obs, e.g. [obs1, obs2, obs3]. 1140 num_grad : bool 1141 if True, numerical derivatives are used instead of autograd 1142 (default False). To control the numerical differentiation the 1143 kwargs of numdifftools.step_generators.MaxStepGenerator 1144 can be used. 1145 man_grad : list 1146 manually supply a list or an array which contains the jacobian 1147 of func. Use cautiously, supplying the wrong derivative will 1148 not be intercepted. 1149 1150 Notes 1151 ----- 1152 For simple mathematical operations it can be practical to use anonymous 1153 functions. For the ratio of two observables one can e.g. use 1154 1155 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1156 """ 1157 1158 data = np.asarray(data) 1159 raveled_data = data.ravel() 1160 1161 # Workaround for matrix operations containing non Obs data 1162 if not all(isinstance(x, Obs) for x in raveled_data): 1163 for i in range(len(raveled_data)): 1164 if isinstance(raveled_data[i], (int, float)): 1165 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1166 1167 allcov = {} 1168 for o in raveled_data: 1169 for name in o.cov_names: 1170 if name in allcov: 1171 if not np.allclose(allcov[name], o.covobs[name].cov): 1172 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1173 else: 1174 allcov[name] = o.covobs[name].cov 1175 1176 n_obs = len(raveled_data) 1177 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1178 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1179 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1180 1181 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1182 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1183 1184 if data.ndim == 1: 1185 values = np.array([o.value for o in data]) 1186 else: 1187 values = np.vectorize(lambda x: x.value)(data) 1188 1189 new_values = func(values, **kwargs) 1190 1191 multi = int(isinstance(new_values, np.ndarray)) 1192 1193 new_r_values = {} 1194 new_idl_d = {} 1195 for name in new_sample_names: 1196 idl = [] 1197 tmp_values = np.zeros(n_obs) 1198 for i, item in enumerate(raveled_data): 1199 tmp_values[i] = item.r_values.get(name, item.value) 1200 tmp_idl = item.idl.get(name) 1201 if tmp_idl is not None: 1202 idl.append(tmp_idl) 1203 if multi > 0: 1204 tmp_values = np.array(tmp_values).reshape(data.shape) 1205 new_r_values[name] = func(tmp_values, **kwargs) 1206 new_idl_d[name] = _merge_idx(idl) 1207 if not is_merged[name]: 1208 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1209 1210 if 'man_grad' in kwargs: 1211 deriv = np.asarray(kwargs.get('man_grad')) 1212 if new_values.shape + data.shape != deriv.shape: 1213 raise Exception('Manual derivative does not have correct shape.') 1214 elif kwargs.get('num_grad') is True: 1215 if multi > 0: 1216 raise Exception('Multi mode currently not supported for numerical derivative') 1217 options = { 1218 'base_step': 0.1, 1219 'step_ratio': 2.5} 1220 for key in options.keys(): 1221 kwarg = kwargs.get(key) 1222 if kwarg is not None: 1223 options[key] = kwarg 1224 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1225 if tmp_df.size == 1: 1226 deriv = np.array([tmp_df.real]) 1227 else: 1228 deriv = tmp_df.real 1229 else: 1230 deriv = jacobian(func)(values, **kwargs) 1231 1232 final_result = np.zeros(new_values.shape, dtype=object) 1233 1234 if array_mode is True: 1235 1236 class _Zero_grad(): 1237 def __init__(self, N): 1238 self.grad = np.zeros((N, 1)) 1239 1240 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1241 d_extracted = {} 1242 g_extracted = {} 1243 for name in new_sample_names: 1244 d_extracted[name] = [] 1245 ens_length = len(new_idl_d[name]) 1246 for i_dat, dat in enumerate(data): 1247 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1248 for name in new_cov_names: 1249 g_extracted[name] = [] 1250 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1251 for i_dat, dat in enumerate(data): 1252 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1253 1254 for i_val, new_val in np.ndenumerate(new_values): 1255 new_deltas = {} 1256 new_grad = {} 1257 if array_mode is True: 1258 for name in new_sample_names: 1259 ens_length = d_extracted[name][0].shape[-1] 1260 new_deltas[name] = np.zeros(ens_length) 1261 for i_dat, dat in enumerate(d_extracted[name]): 1262 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1263 for name in new_cov_names: 1264 new_grad[name] = 0 1265 for i_dat, dat in enumerate(g_extracted[name]): 1266 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1267 else: 1268 for j_obs, obs in np.ndenumerate(data): 1269 for name in obs.names: 1270 if name in obs.cov_names: 1271 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1272 else: 1273 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1274 1275 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1276 1277 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1278 raise Exception('The same name has been used for deltas and covobs!') 1279 new_samples = [] 1280 new_means = [] 1281 new_idl = [] 1282 new_names_obs = [] 1283 for name in new_names: 1284 if name not in new_covobs: 1285 if is_merged[name]: 1286 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1287 else: 1288 filtered_deltas = new_deltas[name] 1289 filtered_idl_d = new_idl_d[name] 1290 1291 new_samples.append(filtered_deltas) 1292 new_idl.append(filtered_idl_d) 1293 new_means.append(new_r_values[name][i_val]) 1294 new_names_obs.append(name) 1295 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1296 for name in new_covobs: 1297 final_result[i_val].names.append(name) 1298 final_result[i_val]._covobs = new_covobs 1299 final_result[i_val]._value = new_val 1300 final_result[i_val].is_merged = is_merged 1301 final_result[i_val].reweighted = reweighted 1302 1303 if multi == 0: 1304 final_result = final_result.item() 1305 1306 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1346def reweight(weight, obs, **kwargs): 1347 """Reweight a list of observables. 1348 1349 Parameters 1350 ---------- 1351 weight : Obs 1352 Reweighting factor. An Observable that has to be defined on a superset of the 1353 configurations in obs[i].idl for all i. 1354 obs : list 1355 list of Obs, e.g. [obs1, obs2, obs3]. 1356 all_configs : bool 1357 if True, the reweighted observables are normalized by the average of 1358 the reweighting factor on all configurations in weight.idl and not 1359 on the configurations in obs[i].idl. Default False. 1360 """ 1361 result = [] 1362 for i in range(len(obs)): 1363 if len(obs[i].cov_names): 1364 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1365 if not set(obs[i].names).issubset(weight.names): 1366 raise Exception('Error: Ensembles do not fit') 1367 for name in obs[i].names: 1368 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1369 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1370 new_samples = [] 1371 w_deltas = {} 1372 for name in sorted(obs[i].names): 1373 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1374 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1375 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1376 1377 if kwargs.get('all_configs'): 1378 new_weight = weight 1379 else: 1380 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1381 1382 result.append(tmp_obs / new_weight) 1383 result[-1].reweighted = True 1384 result[-1].is_merged = obs[i].is_merged 1385 1386 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1389def correlate(obs_a, obs_b): 1390 """Correlate two observables. 1391 1392 Parameters 1393 ---------- 1394 obs_a : Obs 1395 First observable 1396 obs_b : Obs 1397 Second observable 1398 1399 Notes 1400 ----- 1401 Keep in mind to only correlate primary observables which have not been reweighted 1402 yet. The reweighting has to be applied after correlating the observables. 1403 Currently only works if ensembles are identical (this is not strictly necessary). 1404 """ 1405 1406 if sorted(obs_a.names) != sorted(obs_b.names): 1407 raise Exception('Ensembles do not fit') 1408 if len(obs_a.cov_names) or len(obs_b.cov_names): 1409 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1410 for name in obs_a.names: 1411 if obs_a.shape[name] != obs_b.shape[name]: 1412 raise Exception('Shapes of ensemble', name, 'do not fit') 1413 if obs_a.idl[name] != obs_b.idl[name]: 1414 raise Exception('idl of ensemble', name, 'do not fit') 1415 1416 if obs_a.reweighted is True: 1417 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1418 if obs_b.reweighted is True: 1419 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1420 1421 new_samples = [] 1422 new_idl = [] 1423 for name in sorted(obs_a.names): 1424 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1425 new_idl.append(obs_a.idl[name]) 1426 1427 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1428 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1429 o.reweighted = obs_a.reweighted or obs_b.reweighted 1430 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1433def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1434 r'''Calculates the error covariance matrix of a set of observables. 1435 1436 The gamma method has to be applied first to all observables. 1437 1438 Parameters 1439 ---------- 1440 obs : list or numpy.ndarray 1441 List or one dimensional array of Obs 1442 visualize : bool 1443 If True plots the corresponding normalized correlation matrix (default False). 1444 correlation : bool 1445 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1446 smooth : None or int 1447 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1448 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1449 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1450 small ones. 1451 1452 Notes 1453 ----- 1454 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1455 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1456 in the absence of autocorrelation. 1457 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1458 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1459 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1460 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1461 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1462 ''' 1463 1464 length = len(obs) 1465 1466 max_samples = np.max([o.N for o in obs]) 1467 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1468 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1469 1470 cov = np.zeros((length, length)) 1471 for i in range(length): 1472 for j in range(i, length): 1473 cov[i, j] = _covariance_element(obs[i], obs[j]) 1474 cov = cov + cov.T - np.diag(np.diag(cov)) 1475 1476 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1477 1478 if isinstance(smooth, int): 1479 corr = _smooth_eigenvalues(corr, smooth) 1480 1481 if visualize: 1482 plt.matshow(corr, vmin=-1, vmax=1) 1483 plt.set_cmap('RdBu') 1484 plt.colorbar() 1485 plt.draw() 1486 1487 if correlation is True: 1488 return corr 1489 1490 errors = [o.dvalue for o in obs] 1491 cov = np.diag(errors) @ corr @ np.diag(errors) 1492 1493 eigenvalues = np.linalg.eigh(cov)[0] 1494 if not np.all(eigenvalues >= 0): 1495 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1496 1497 return cov
Calculates the error covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1577def import_jackknife(jacks, name, idl=None): 1578 """Imports jackknife samples and returns an Obs 1579 1580 Parameters 1581 ---------- 1582 jacks : numpy.ndarray 1583 numpy array containing the mean value as zeroth entry and 1584 the N jackknife samples as first to Nth entry. 1585 name : str 1586 name of the ensemble the samples are defined on. 1587 """ 1588 length = len(jacks) - 1 1589 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1590 samples = jacks[1:] @ prj 1591 mean = np.mean(samples) 1592 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1593 new_obs._value = jacks[0] 1594 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1597def merge_obs(list_of_obs): 1598 """Combine all observables in list_of_obs into one new observable 1599 1600 Parameters 1601 ---------- 1602 list_of_obs : list 1603 list of the Obs object to be combined 1604 1605 Notes 1606 ----- 1607 It is not possible to combine obs which are based on the same replicum 1608 """ 1609 replist = [item for obs in list_of_obs for item in obs.names] 1610 if (len(replist) == len(set(replist))) is False: 1611 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1612 if any([len(o.cov_names) for o in list_of_obs]): 1613 raise Exception('Not possible to merge data that contains covobs!') 1614 new_dict = {} 1615 idl_dict = {} 1616 for o in list_of_obs: 1617 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1618 for key in set(o.deltas) | set(o.r_values)}) 1619 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1620 1621 names = sorted(new_dict.keys()) 1622 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1623 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1624 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1625 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1628def cov_Obs(means, cov, name, grad=None): 1629 """Create an Obs based on mean(s) and a covariance matrix 1630 1631 Parameters 1632 ---------- 1633 mean : list of floats or float 1634 N mean value(s) of the new Obs 1635 cov : list or array 1636 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1637 name : str 1638 identifier for the covariance matrix 1639 grad : list or array 1640 Gradient of the Covobs wrt. the means belonging to cov. 1641 """ 1642 1643 def covobs_to_obs(co): 1644 """Make an Obs out of a Covobs 1645 1646 Parameters 1647 ---------- 1648 co : Covobs 1649 Covobs to be embedded into the Obs 1650 """ 1651 o = Obs([], [], means=[]) 1652 o._value = co.value 1653 o.names.append(co.name) 1654 o._covobs[co.name] = co 1655 o._dvalue = np.sqrt(co.errsq()) 1656 return o 1657 1658 ol = [] 1659 if isinstance(means, (float, int)): 1660 means = [means] 1661 1662 for i in range(len(means)): 1663 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1664 if ol[0].covobs[name].N != len(means): 1665 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1666 if len(ol) == 1: 1667 return ol[0] 1668 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.