pyerrors.obs
1import warnings 2import pickle 3from math import gcd 4from functools import reduce 5import numpy as np 6import autograd.numpy as anp # Thinly-wrapped numpy 7from autograd import jacobian 8import matplotlib.pyplot as plt 9from scipy.stats import skew, skewtest, kurtosis, kurtosistest 10import numdifftools as nd 11from itertools import groupby 12from .covobs import Covobs 13 14 15class Obs: 16 """Class for a general observable. 17 18 Instances of Obs are the basic objects of a pyerrors error analysis. 19 They are initialized with a list which contains arrays of samples for 20 different ensembles/replica and another list of same length which contains 21 the names of the ensembles/replica. Mathematical operations can be 22 performed on instances. The result is another instance of Obs. The error of 23 an instance can be computed with the gamma_method. Also contains additional 24 methods for output and visualization of the error calculation. 25 26 Attributes 27 ---------- 28 S_global : float 29 Standard value for S (default 2.0) 30 S_dict : dict 31 Dictionary for S values. If an entry for a given ensemble 32 exists this overwrites the standard value for that ensemble. 33 tau_exp_global : float 34 Standard value for tau_exp (default 0.0) 35 tau_exp_dict : dict 36 Dictionary for tau_exp values. If an entry for a given ensemble exists 37 this overwrites the standard value for that ensemble. 38 N_sigma_global : float 39 Standard value for N_sigma (default 1.0) 40 N_sigma_dict : dict 41 Dictionary for N_sigma values. If an entry for a given ensemble exists 42 this overwrites the standard value for that ensemble. 43 """ 44 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 45 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 46 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 47 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 48 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 49 50 S_global = 2.0 51 S_dict = {} 52 tau_exp_global = 0.0 53 tau_exp_dict = {} 54 N_sigma_global = 1.0 55 N_sigma_dict = {} 56 filter_eps = 1e-10 57 58 def __init__(self, samples, names, idl=None, **kwargs): 59 """ Initialize Obs object. 60 61 Parameters 62 ---------- 63 samples : list 64 list of numpy arrays containing the Monte Carlo samples 65 names : list 66 list of strings labeling the individual samples 67 idl : list, optional 68 list of ranges or lists on which the samples are defined 69 """ 70 71 if kwargs.get("means") is None and len(samples): 72 if len(samples) != len(names): 73 raise Exception('Length of samples and names incompatible.') 74 if idl is not None: 75 if len(idl) != len(names): 76 raise Exception('Length of idl incompatible with samples and names.') 77 name_length = len(names) 78 if name_length > 1: 79 if name_length != len(set(names)): 80 raise Exception('names are not unique.') 81 if not all(isinstance(x, str) for x in names): 82 raise TypeError('All names have to be strings.') 83 else: 84 if not isinstance(names[0], str): 85 raise TypeError('All names have to be strings.') 86 if min(len(x) for x in samples) <= 4: 87 raise Exception('Samples have to have at least 5 entries.') 88 89 self.names = sorted(names) 90 self.shape = {} 91 self.r_values = {} 92 self.deltas = {} 93 self._covobs = {} 94 95 self._value = 0 96 self.N = 0 97 self.is_merged = {} 98 self.idl = {} 99 if idl is not None: 100 for name, idx in sorted(zip(names, idl)): 101 if isinstance(idx, range): 102 self.idl[name] = idx 103 elif isinstance(idx, (list, np.ndarray)): 104 dc = np.unique(np.diff(idx)) 105 if np.any(dc < 0): 106 raise Exception("Unsorted idx for idl[%s]" % (name)) 107 if len(dc) == 1: 108 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 109 else: 110 self.idl[name] = list(idx) 111 else: 112 raise Exception('incompatible type for idl[%s].' % (name)) 113 else: 114 for name, sample in sorted(zip(names, samples)): 115 self.idl[name] = range(1, len(sample) + 1) 116 117 if kwargs.get("means") is not None: 118 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 119 self.shape[name] = len(self.idl[name]) 120 self.N += self.shape[name] 121 self.r_values[name] = mean 122 self.deltas[name] = sample 123 else: 124 for name, sample in sorted(zip(names, samples)): 125 self.shape[name] = len(self.idl[name]) 126 self.N += self.shape[name] 127 if len(sample) != self.shape[name]: 128 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 129 self.r_values[name] = np.mean(sample) 130 self.deltas[name] = sample - self.r_values[name] 131 self._value += self.shape[name] * self.r_values[name] 132 self._value /= self.N 133 134 self._dvalue = 0.0 135 self.ddvalue = 0.0 136 self.reweighted = False 137 138 self.tag = None 139 140 @property 141 def value(self): 142 return self._value 143 144 @property 145 def dvalue(self): 146 return self._dvalue 147 148 @property 149 def e_names(self): 150 return sorted(set([o.split('|')[0] for o in self.names])) 151 152 @property 153 def cov_names(self): 154 return sorted(set([o for o in self.covobs.keys()])) 155 156 @property 157 def mc_names(self): 158 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 159 160 @property 161 def e_content(self): 162 res = {} 163 for e, e_name in enumerate(self.e_names): 164 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 165 if e_name in self.names: 166 res[e_name].append(e_name) 167 return res 168 169 @property 170 def covobs(self): 171 return self._covobs 172 173 def gamma_method(self, **kwargs): 174 """Estimate the error and related properties of the Obs. 175 176 Parameters 177 ---------- 178 S : float 179 specifies a custom value for the parameter S (default 2.0). 180 If set to 0 it is assumed that the data exhibits no 181 autocorrelation. In this case the error estimates coincides 182 with the sample standard error. 183 tau_exp : float 184 positive value triggers the critical slowing down analysis 185 (default 0.0). 186 N_sigma : float 187 number of standard deviations from zero until the tail is 188 attached to the autocorrelation function (default 1). 189 fft : bool 190 determines whether the fft algorithm is used for the computation 191 of the autocorrelation function (default True) 192 """ 193 194 e_content = self.e_content 195 self.e_dvalue = {} 196 self.e_ddvalue = {} 197 self.e_tauint = {} 198 self.e_dtauint = {} 199 self.e_windowsize = {} 200 self.e_n_tauint = {} 201 self.e_n_dtauint = {} 202 e_gamma = {} 203 self.e_rho = {} 204 self.e_drho = {} 205 self._dvalue = 0 206 self.ddvalue = 0 207 208 self.S = {} 209 self.tau_exp = {} 210 self.N_sigma = {} 211 212 if kwargs.get('fft') is False: 213 fft = False 214 else: 215 fft = True 216 217 def _parse_kwarg(kwarg_name): 218 if kwarg_name in kwargs: 219 tmp = kwargs.get(kwarg_name) 220 if isinstance(tmp, (int, float)): 221 if tmp < 0: 222 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 223 for e, e_name in enumerate(self.e_names): 224 getattr(self, kwarg_name)[e_name] = tmp 225 else: 226 raise TypeError(kwarg_name + ' is not in proper format.') 227 else: 228 for e, e_name in enumerate(self.e_names): 229 if e_name in getattr(Obs, kwarg_name + '_dict'): 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 231 else: 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 233 234 _parse_kwarg('S') 235 _parse_kwarg('tau_exp') 236 _parse_kwarg('N_sigma') 237 238 for e, e_name in enumerate(self.mc_names): 239 r_length = [] 240 for r_name in e_content[e_name]: 241 if isinstance(self.idl[r_name], range): 242 r_length.append(len(self.idl[r_name])) 243 else: 244 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 245 246 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 247 w_max = max(r_length) // 2 248 e_gamma[e_name] = np.zeros(w_max) 249 self.e_rho[e_name] = np.zeros(w_max) 250 self.e_drho[e_name] = np.zeros(w_max) 251 252 for r_name in e_content[e_name]: 253 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 254 255 gamma_div = np.zeros(w_max) 256 for r_name in e_content[e_name]: 257 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 258 gamma_div[gamma_div < 1] = 1.0 259 e_gamma[e_name] /= gamma_div[:w_max] 260 261 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 262 self.e_tauint[e_name] = 0.5 263 self.e_dtauint[e_name] = 0.0 264 self.e_dvalue[e_name] = 0.0 265 self.e_ddvalue[e_name] = 0.0 266 self.e_windowsize[e_name] = 0 267 continue 268 269 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 270 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 271 # Make sure no entry of tauint is smaller than 0.5 272 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 273 # hep-lat/0306017 eq. (42) 274 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 275 self.e_n_dtauint[e_name][0] = 0.0 276 277 def _compute_drho(i): 278 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 279 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 280 281 _compute_drho(1) 282 if self.tau_exp[e_name] > 0: 283 texp = self.tau_exp[e_name] 284 # Critical slowing down analysis 285 if w_max // 2 <= 1: 286 raise Exception("Need at least 8 samples for tau_exp error analysis") 287 for n in range(1, w_max // 2): 288 _compute_drho(n + 1) 289 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 290 # Bias correction hep-lat/0306017 eq. (49) included 291 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 292 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 293 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 294 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 295 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 296 self.e_windowsize[e_name] = n 297 break 298 else: 299 if self.S[e_name] == 0.0: 300 self.e_tauint[e_name] = 0.5 301 self.e_dtauint[e_name] = 0.0 302 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 303 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 304 self.e_windowsize[e_name] = 0 305 else: 306 # Standard automatic windowing procedure 307 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 308 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 309 for n in range(1, w_max): 310 if n < w_max // 2 - 2: 311 _compute_drho(n + 1) 312 if g_w[n - 1] < 0 or n >= w_max - 1: 313 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 314 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 315 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 316 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 317 self.e_windowsize[e_name] = n 318 break 319 320 self._dvalue += self.e_dvalue[e_name] ** 2 321 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 322 323 for e_name in self.cov_names: 324 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 325 self.e_ddvalue[e_name] = 0 326 self._dvalue += self.e_dvalue[e_name]**2 327 328 self._dvalue = np.sqrt(self._dvalue) 329 if self._dvalue == 0.0: 330 self.ddvalue = 0.0 331 else: 332 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 333 return 334 335 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 336 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 337 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 338 339 Parameters 340 ---------- 341 deltas : list 342 List of fluctuations 343 idx : list 344 List or range of configurations on which the deltas are defined. 345 shape : int 346 Number of configurations in idx. 347 w_max : int 348 Upper bound for the summation window. 349 fft : bool 350 determines whether the fft algorithm is used for the computation 351 of the autocorrelation function. 352 """ 353 gamma = np.zeros(w_max) 354 deltas = _expand_deltas(deltas, idx, shape) 355 new_shape = len(deltas) 356 if fft: 357 max_gamma = min(new_shape, w_max) 358 # The padding for the fft has to be even 359 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 360 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 361 else: 362 for n in range(w_max): 363 if new_shape - n >= 0: 364 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 365 366 return gamma 367 368 def details(self, ens_content=True): 369 """Output detailed properties of the Obs. 370 371 Parameters 372 ---------- 373 ens_content : bool 374 print details about the ensembles and replica if true. 375 """ 376 if self.tag is not None: 377 print("Description:", self.tag) 378 if not hasattr(self, 'e_dvalue'): 379 print('Result\t %3.8e' % (self.value)) 380 else: 381 if self.value == 0.0: 382 percentage = np.nan 383 else: 384 percentage = np.abs(self._dvalue / self.value) * 100 385 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 386 if len(self.e_names) > 1: 387 print(' Ensemble errors:') 388 for e_name in self.mc_names: 389 if len(self.e_names) > 1: 390 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 391 if self.tau_exp[e_name] > 0: 392 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 393 else: 394 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 395 for e_name in self.cov_names: 396 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 397 if ens_content is True: 398 if len(self.e_names) == 1: 399 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 400 else: 401 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 402 my_string_list = [] 403 for key, value in sorted(self.e_content.items()): 404 if key not in self.covobs: 405 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 406 if len(value) == 1: 407 my_string += f': {self.shape[value[0]]} configurations' 408 if isinstance(self.idl[value[0]], range): 409 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 410 else: 411 my_string += ' (irregular range)' 412 else: 413 sublist = [] 414 for v in value: 415 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 416 my_substring += f': {self.shape[v]} configurations' 417 if isinstance(self.idl[v], range): 418 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 419 else: 420 my_substring += ' (irregular range)' 421 sublist.append(my_substring) 422 423 my_string += '\n' + '\n'.join(sublist) 424 else: 425 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 426 my_string_list.append(my_string) 427 print('\n'.join(my_string_list)) 428 429 def is_zero_within_error(self, sigma=1): 430 """Checks whether the observable is zero within 'sigma' standard errors. 431 432 Parameters 433 ---------- 434 sigma : int 435 Number of standard errors used for the check. 436 437 Works only properly when the gamma method was run. 438 """ 439 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 440 441 def is_zero(self, atol=1e-10): 442 """Checks whether the observable is zero within a given tolerance. 443 444 Parameters 445 ---------- 446 atol : float 447 Absolute tolerance (for details see numpy documentation). 448 """ 449 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 450 451 def plot_tauint(self, save=None): 452 """Plot integrated autocorrelation time for each ensemble. 453 454 Parameters 455 ---------- 456 save : str 457 saves the figure to a file named 'save' if. 458 """ 459 if not hasattr(self, 'e_dvalue'): 460 raise Exception('Run the gamma method first.') 461 462 for e, e_name in enumerate(self.mc_names): 463 fig = plt.figure() 464 plt.xlabel(r'$W$') 465 plt.ylabel(r'$\tau_\mathrm{int}$') 466 length = int(len(self.e_n_tauint[e_name])) 467 if self.tau_exp[e_name] > 0: 468 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 469 x_help = np.arange(2 * self.tau_exp[e_name]) 470 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 471 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 472 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 473 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 474 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 475 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 476 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 477 else: 478 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 479 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 480 481 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 482 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 483 plt.legend() 484 plt.xlim(-0.5, xmax) 485 ylim = plt.ylim() 486 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 487 plt.draw() 488 if save: 489 fig.savefig(save + "_" + str(e)) 490 491 def plot_rho(self, save=None): 492 """Plot normalized autocorrelation function time for each ensemble. 493 494 Parameters 495 ---------- 496 save : str 497 saves the figure to a file named 'save' if. 498 """ 499 if not hasattr(self, 'e_dvalue'): 500 raise Exception('Run the gamma method first.') 501 for e, e_name in enumerate(self.mc_names): 502 fig = plt.figure() 503 plt.xlabel('W') 504 plt.ylabel('rho') 505 length = int(len(self.e_drho[e_name])) 506 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 507 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 508 if self.tau_exp[e_name] > 0: 509 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 510 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 511 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 512 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 513 else: 514 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 515 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 516 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 517 plt.xlim(-0.5, xmax) 518 plt.draw() 519 if save: 520 fig.savefig(save + "_" + str(e)) 521 522 def plot_rep_dist(self): 523 """Plot replica distribution for each ensemble with more than one replicum.""" 524 if not hasattr(self, 'e_dvalue'): 525 raise Exception('Run the gamma method first.') 526 for e, e_name in enumerate(self.mc_names): 527 if len(self.e_content[e_name]) == 1: 528 print('No replica distribution for a single replicum (', e_name, ')') 529 continue 530 r_length = [] 531 sub_r_mean = 0 532 for r, r_name in enumerate(self.e_content[e_name]): 533 r_length.append(len(self.deltas[r_name])) 534 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 535 e_N = np.sum(r_length) 536 sub_r_mean /= e_N 537 arr = np.zeros(len(self.e_content[e_name])) 538 for r, r_name in enumerate(self.e_content[e_name]): 539 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 540 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 541 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 542 plt.draw() 543 544 def plot_history(self, expand=True): 545 """Plot derived Monte Carlo history for each ensemble 546 547 Parameters 548 ---------- 549 expand : bool 550 show expanded history for irregular Monte Carlo chains (default: True). 551 """ 552 for e, e_name in enumerate(self.mc_names): 553 plt.figure() 554 r_length = [] 555 tmp = [] 556 tmp_expanded = [] 557 for r, r_name in enumerate(self.e_content[e_name]): 558 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 559 if expand: 560 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 561 r_length.append(len(tmp_expanded[-1])) 562 else: 563 r_length.append(len(tmp[-1])) 564 e_N = np.sum(r_length) 565 x = np.arange(e_N) 566 y_test = np.concatenate(tmp, axis=0) 567 if expand: 568 y = np.concatenate(tmp_expanded, axis=0) 569 else: 570 y = y_test 571 plt.errorbar(x, y, fmt='.', markersize=3) 572 plt.xlim(-0.5, e_N - 0.5) 573 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 574 plt.draw() 575 576 def plot_piechart(self, save=None): 577 """Plot piechart which shows the fractional contribution of each 578 ensemble to the error and returns a dictionary containing the fractions. 579 580 Parameters 581 ---------- 582 save : str 583 saves the figure to a file named 'save' if. 584 """ 585 if not hasattr(self, 'e_dvalue'): 586 raise Exception('Run the gamma method first.') 587 if np.isclose(0.0, self._dvalue, atol=1e-15): 588 raise Exception('Error is 0.0') 589 labels = self.e_names 590 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 591 fig1, ax1 = plt.subplots() 592 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 593 ax1.axis('equal') 594 plt.draw() 595 if save: 596 fig1.savefig(save) 597 598 return dict(zip(self.e_names, sizes)) 599 600 def dump(self, filename, datatype="json.gz", description="", **kwargs): 601 """Dump the Obs to a file 'name' of chosen format. 602 603 Parameters 604 ---------- 605 filename : str 606 name of the file to be saved. 607 datatype : str 608 Format of the exported file. Supported formats include 609 "json.gz" and "pickle" 610 description : str 611 Description for output file, only relevant for json.gz format. 612 path : str 613 specifies a custom path for the file (default '.') 614 """ 615 if 'path' in kwargs: 616 file_name = kwargs.get('path') + '/' + filename 617 else: 618 file_name = filename 619 620 if datatype == "json.gz": 621 from .input.json import dump_to_json 622 dump_to_json([self], file_name, description=description) 623 elif datatype == "pickle": 624 with open(file_name + '.p', 'wb') as fb: 625 pickle.dump(self, fb) 626 else: 627 raise Exception("Unknown datatype " + str(datatype)) 628 629 def export_jackknife(self): 630 """Export jackknife samples from the Obs 631 632 Returns 633 ------- 634 numpy.ndarray 635 Returns a numpy array of length N + 1 where N is the number of samples 636 for the given ensemble and replicum. The zeroth entry of the array contains 637 the mean value of the Obs, entries 1 to N contain the N jackknife samples 638 derived from the Obs. The current implementation only works for observables 639 defined on exactly one ensemble and replicum. The derived jackknife samples 640 should agree with samples from a full jackknife analysis up to O(1/N). 641 """ 642 643 if len(self.names) != 1: 644 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 645 646 name = self.names[0] 647 full_data = self.deltas[name] + self.r_values[name] 648 n = full_data.size 649 mean = self.value 650 tmp_jacks = np.zeros(n + 1) 651 tmp_jacks[0] = mean 652 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 653 return tmp_jacks 654 655 def __float__(self): 656 return float(self.value) 657 658 def __repr__(self): 659 return 'Obs[' + str(self) + ']' 660 661 def __str__(self): 662 if self._dvalue == 0.0: 663 return str(self.value) 664 fexp = np.floor(np.log10(self._dvalue)) 665 if fexp < 0.0: 666 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 667 elif fexp == 0.0: 668 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 669 else: 670 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 671 672 # Overload comparisons 673 def __lt__(self, other): 674 return self.value < other 675 676 def __le__(self, other): 677 return self.value <= other 678 679 def __gt__(self, other): 680 return self.value > other 681 682 def __ge__(self, other): 683 return self.value >= other 684 685 def __eq__(self, other): 686 return (self - other).is_zero() 687 688 def __ne__(self, other): 689 return not (self - other).is_zero() 690 691 # Overload math operations 692 def __add__(self, y): 693 if isinstance(y, Obs): 694 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 695 else: 696 if isinstance(y, np.ndarray): 697 return np.array([self + o for o in y]) 698 elif y.__class__.__name__ in ['Corr', 'CObs']: 699 return NotImplemented 700 else: 701 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 702 703 def __radd__(self, y): 704 return self + y 705 706 def __mul__(self, y): 707 if isinstance(y, Obs): 708 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 709 else: 710 if isinstance(y, np.ndarray): 711 return np.array([self * o for o in y]) 712 elif isinstance(y, complex): 713 return CObs(self * y.real, self * y.imag) 714 elif y.__class__.__name__ in ['Corr', 'CObs']: 715 return NotImplemented 716 else: 717 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 718 719 def __rmul__(self, y): 720 return self * y 721 722 def __sub__(self, y): 723 if isinstance(y, Obs): 724 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 725 else: 726 if isinstance(y, np.ndarray): 727 return np.array([self - o for o in y]) 728 elif y.__class__.__name__ in ['Corr', 'CObs']: 729 return NotImplemented 730 else: 731 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 732 733 def __rsub__(self, y): 734 return -1 * (self - y) 735 736 def __pos__(self): 737 return self 738 739 def __neg__(self): 740 return -1 * self 741 742 def __truediv__(self, y): 743 if isinstance(y, Obs): 744 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 745 else: 746 if isinstance(y, np.ndarray): 747 return np.array([self / o for o in y]) 748 elif y.__class__.__name__ in ['Corr', 'CObs']: 749 return NotImplemented 750 else: 751 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 752 753 def __rtruediv__(self, y): 754 if isinstance(y, Obs): 755 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 756 else: 757 if isinstance(y, np.ndarray): 758 return np.array([o / self for o in y]) 759 elif y.__class__.__name__ in ['Corr', 'CObs']: 760 return NotImplemented 761 else: 762 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 763 764 def __pow__(self, y): 765 if isinstance(y, Obs): 766 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 767 else: 768 return derived_observable(lambda x: x[0] ** y, [self]) 769 770 def __rpow__(self, y): 771 if isinstance(y, Obs): 772 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 773 else: 774 return derived_observable(lambda x: y ** x[0], [self]) 775 776 def __abs__(self): 777 return derived_observable(lambda x: anp.abs(x[0]), [self]) 778 779 # Overload numpy functions 780 def sqrt(self): 781 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 782 783 def log(self): 784 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 785 786 def exp(self): 787 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 788 789 def sin(self): 790 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 791 792 def cos(self): 793 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 794 795 def tan(self): 796 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 797 798 def arcsin(self): 799 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 800 801 def arccos(self): 802 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 803 804 def arctan(self): 805 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 806 807 def sinh(self): 808 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 809 810 def cosh(self): 811 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 812 813 def tanh(self): 814 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 815 816 def arcsinh(self): 817 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 818 819 def arccosh(self): 820 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 821 822 def arctanh(self): 823 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 824 825 826class CObs: 827 """Class for a complex valued observable.""" 828 __slots__ = ['_real', '_imag', 'tag'] 829 830 def __init__(self, real, imag=0.0): 831 self._real = real 832 self._imag = imag 833 self.tag = None 834 835 @property 836 def real(self): 837 return self._real 838 839 @property 840 def imag(self): 841 return self._imag 842 843 def gamma_method(self, **kwargs): 844 """Executes the gamma_method for the real and the imaginary part.""" 845 if isinstance(self.real, Obs): 846 self.real.gamma_method(**kwargs) 847 if isinstance(self.imag, Obs): 848 self.imag.gamma_method(**kwargs) 849 850 def is_zero(self): 851 """Checks whether both real and imaginary part are zero within machine precision.""" 852 return self.real == 0.0 and self.imag == 0.0 853 854 def conjugate(self): 855 return CObs(self.real, -self.imag) 856 857 def __add__(self, other): 858 if isinstance(other, np.ndarray): 859 return other + self 860 elif hasattr(other, 'real') and hasattr(other, 'imag'): 861 return CObs(self.real + other.real, 862 self.imag + other.imag) 863 else: 864 return CObs(self.real + other, self.imag) 865 866 def __radd__(self, y): 867 return self + y 868 869 def __sub__(self, other): 870 if isinstance(other, np.ndarray): 871 return -1 * (other - self) 872 elif hasattr(other, 'real') and hasattr(other, 'imag'): 873 return CObs(self.real - other.real, self.imag - other.imag) 874 else: 875 return CObs(self.real - other, self.imag) 876 877 def __rsub__(self, other): 878 return -1 * (self - other) 879 880 def __mul__(self, other): 881 if isinstance(other, np.ndarray): 882 return other * self 883 elif hasattr(other, 'real') and hasattr(other, 'imag'): 884 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 885 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 886 [self.real, other.real, self.imag, other.imag], 887 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 888 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 889 [self.real, other.real, self.imag, other.imag], 890 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 891 elif getattr(other, 'imag', 0) != 0: 892 return CObs(self.real * other.real - self.imag * other.imag, 893 self.imag * other.real + self.real * other.imag) 894 else: 895 return CObs(self.real * other.real, self.imag * other.real) 896 else: 897 return CObs(self.real * other, self.imag * other) 898 899 def __rmul__(self, other): 900 return self * other 901 902 def __truediv__(self, other): 903 if isinstance(other, np.ndarray): 904 return 1 / (other / self) 905 elif hasattr(other, 'real') and hasattr(other, 'imag'): 906 r = other.real ** 2 + other.imag ** 2 907 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 908 else: 909 return CObs(self.real / other, self.imag / other) 910 911 def __rtruediv__(self, other): 912 r = self.real ** 2 + self.imag ** 2 913 if hasattr(other, 'real') and hasattr(other, 'imag'): 914 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 915 else: 916 return CObs(self.real * other / r, -self.imag * other / r) 917 918 def __abs__(self): 919 return np.sqrt(self.real**2 + self.imag**2) 920 921 def __pos__(self): 922 return self 923 924 def __neg__(self): 925 return -1 * self 926 927 def __eq__(self, other): 928 return self.real == other.real and self.imag == other.imag 929 930 def __str__(self): 931 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 932 933 def __repr__(self): 934 return 'CObs[' + str(self) + ']' 935 936 937def _expand_deltas(deltas, idx, shape): 938 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 939 If idx is of type range, the deltas are not changed 940 941 Parameters 942 ---------- 943 deltas : list 944 List of fluctuations 945 idx : list 946 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 947 shape : int 948 Number of configs in idx. 949 """ 950 if isinstance(idx, range): 951 return deltas 952 else: 953 ret = np.zeros(idx[-1] - idx[0] + 1) 954 for i in range(shape): 955 ret[idx[i] - idx[0]] = deltas[i] 956 return ret 957 958 959def _merge_idx(idl): 960 """Returns the union of all lists in idl as sorted list 961 962 Parameters 963 ---------- 964 idl : list 965 List of lists or ranges. 966 """ 967 968 # Use groupby to efficiently check whether all elements of idl are identical 969 try: 970 g = groupby(idl) 971 if next(g, True) and not next(g, False): 972 return idl[0] 973 except Exception: 974 pass 975 976 if np.all([type(idx) is range for idx in idl]): 977 if len(set([idx[0] for idx in idl])) == 1: 978 idstart = min([idx.start for idx in idl]) 979 idstop = max([idx.stop for idx in idl]) 980 idstep = min([idx.step for idx in idl]) 981 return range(idstart, idstop, idstep) 982 983 return sorted(set().union(*idl)) 984 985 986def _intersection_idx(idl): 987 """Returns the intersection of all lists in idl as sorted list 988 989 Parameters 990 ---------- 991 idl : list 992 List of lists or ranges. 993 """ 994 995 def _lcm(*args): 996 """Returns the lowest common multiple of args. 997 998 From python 3.9 onwards the math library contains an lcm function.""" 999 return reduce(lambda a, b: a * b // gcd(a, b), args) 1000 1001 # Use groupby to efficiently check whether all elements of idl are identical 1002 try: 1003 g = groupby(idl) 1004 if next(g, True) and not next(g, False): 1005 return idl[0] 1006 except Exception: 1007 pass 1008 1009 if np.all([type(idx) is range for idx in idl]): 1010 if len(set([idx[0] for idx in idl])) == 1: 1011 idstart = max([idx.start for idx in idl]) 1012 idstop = min([idx.stop for idx in idl]) 1013 idstep = _lcm(*[idx.step for idx in idl]) 1014 return range(idstart, idstop, idstep) 1015 1016 return sorted(set.intersection(*[set(o) for o in idl])) 1017 1018 1019def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1020 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1021 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1022 common divisor of the step sizes is used as new step size. 1023 1024 Parameters 1025 ---------- 1026 deltas : list 1027 List of fluctuations 1028 idx : list 1029 List or range of configs on which the deltas are defined. 1030 Has to be a subset of new_idx and has to be sorted in ascending order. 1031 shape : list 1032 Number of configs in idx. 1033 new_idx : list 1034 List of configs that defines the new range, has to be sorted in ascending order. 1035 """ 1036 1037 if type(idx) is range and type(new_idx) is range: 1038 if idx == new_idx: 1039 return deltas 1040 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1041 for i in range(shape): 1042 ret[idx[i] - new_idx[0]] = deltas[i] 1043 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1044 1045 1046def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): 1047 """Collapse deltas defined on idx to the list of configs that is defined by new_idx. 1048 If idx and new_idx are of type range, the smallest 1049 common divisor of the step sizes is used as new step size. 1050 1051 Parameters 1052 ---------- 1053 deltas : list 1054 List of fluctuations 1055 idx : list 1056 List or range of configs on which the deltas are defined. 1057 Has to be a subset of new_idx and has to be sorted in ascending order. 1058 shape : list 1059 Number of configs in idx. 1060 new_idx : list 1061 List of configs that defines the new range, has to be sorted in ascending order. 1062 """ 1063 1064 if type(idx) is range and type(new_idx) is range: 1065 if idx == new_idx: 1066 return deltas 1067 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1068 for i in range(shape): 1069 if idx[i] in new_idx: 1070 ret[idx[i] - new_idx[0]] = deltas[i] 1071 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1072 1073 1074def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1075 """Filter out all configurations with vanishing fluctuation such that they do not 1076 contribute to the error estimate anymore. Returns the new deltas and 1077 idx according to the filtering. 1078 A fluctuation is considered to be vanishing, if it is smaller than eps times 1079 the mean of the absolute values of all deltas in one list. 1080 1081 Parameters 1082 ---------- 1083 deltas : list 1084 List of fluctuations 1085 idx : list 1086 List or ranges of configs on which the deltas are defined. 1087 eps : float 1088 Prefactor that enters the filter criterion. 1089 """ 1090 new_deltas = [] 1091 new_idx = [] 1092 maxd = np.mean(np.fabs(deltas)) 1093 for i in range(len(deltas)): 1094 if abs(deltas[i]) > eps * maxd: 1095 new_deltas.append(deltas[i]) 1096 new_idx.append(idx[i]) 1097 if new_idx: 1098 return np.array(new_deltas), new_idx 1099 else: 1100 return deltas, idx 1101 1102 1103def derived_observable(func, data, array_mode=False, **kwargs): 1104 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1105 1106 Parameters 1107 ---------- 1108 func : object 1109 arbitrary function of the form func(data, **kwargs). For the 1110 automatic differentiation to work, all numpy functions have to have 1111 the autograd wrapper (use 'import autograd.numpy as anp'). 1112 data : list 1113 list of Obs, e.g. [obs1, obs2, obs3]. 1114 num_grad : bool 1115 if True, numerical derivatives are used instead of autograd 1116 (default False). To control the numerical differentiation the 1117 kwargs of numdifftools.step_generators.MaxStepGenerator 1118 can be used. 1119 man_grad : list 1120 manually supply a list or an array which contains the jacobian 1121 of func. Use cautiously, supplying the wrong derivative will 1122 not be intercepted. 1123 1124 Notes 1125 ----- 1126 For simple mathematical operations it can be practical to use anonymous 1127 functions. For the ratio of two observables one can e.g. use 1128 1129 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1130 """ 1131 1132 data = np.asarray(data) 1133 raveled_data = data.ravel() 1134 1135 # Workaround for matrix operations containing non Obs data 1136 if not all(isinstance(x, Obs) for x in raveled_data): 1137 for i in range(len(raveled_data)): 1138 if isinstance(raveled_data[i], (int, float)): 1139 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1140 1141 allcov = {} 1142 for o in raveled_data: 1143 for name in o.cov_names: 1144 if name in allcov: 1145 if not np.allclose(allcov[name], o.covobs[name].cov): 1146 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1147 else: 1148 allcov[name] = o.covobs[name].cov 1149 1150 n_obs = len(raveled_data) 1151 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1152 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1153 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1154 1155 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1156 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1157 1158 if data.ndim == 1: 1159 values = np.array([o.value for o in data]) 1160 else: 1161 values = np.vectorize(lambda x: x.value)(data) 1162 1163 new_values = func(values, **kwargs) 1164 1165 multi = int(isinstance(new_values, np.ndarray)) 1166 1167 new_r_values = {} 1168 new_idl_d = {} 1169 for name in new_sample_names: 1170 idl = [] 1171 tmp_values = np.zeros(n_obs) 1172 for i, item in enumerate(raveled_data): 1173 tmp_values[i] = item.r_values.get(name, item.value) 1174 tmp_idl = item.idl.get(name) 1175 if tmp_idl is not None: 1176 idl.append(tmp_idl) 1177 if multi > 0: 1178 tmp_values = np.array(tmp_values).reshape(data.shape) 1179 new_r_values[name] = func(tmp_values, **kwargs) 1180 new_idl_d[name] = _merge_idx(idl) 1181 if not is_merged[name]: 1182 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1183 1184 if 'man_grad' in kwargs: 1185 deriv = np.asarray(kwargs.get('man_grad')) 1186 if new_values.shape + data.shape != deriv.shape: 1187 raise Exception('Manual derivative does not have correct shape.') 1188 elif kwargs.get('num_grad') is True: 1189 if multi > 0: 1190 raise Exception('Multi mode currently not supported for numerical derivative') 1191 options = { 1192 'base_step': 0.1, 1193 'step_ratio': 2.5} 1194 for key in options.keys(): 1195 kwarg = kwargs.get(key) 1196 if kwarg is not None: 1197 options[key] = kwarg 1198 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1199 if tmp_df.size == 1: 1200 deriv = np.array([tmp_df.real]) 1201 else: 1202 deriv = tmp_df.real 1203 else: 1204 deriv = jacobian(func)(values, **kwargs) 1205 1206 final_result = np.zeros(new_values.shape, dtype=object) 1207 1208 if array_mode is True: 1209 1210 class _Zero_grad(): 1211 def __init__(self, N): 1212 self.grad = np.zeros((N, 1)) 1213 1214 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1215 d_extracted = {} 1216 g_extracted = {} 1217 for name in new_sample_names: 1218 d_extracted[name] = [] 1219 ens_length = len(new_idl_d[name]) 1220 for i_dat, dat in enumerate(data): 1221 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1222 for name in new_cov_names: 1223 g_extracted[name] = [] 1224 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1225 for i_dat, dat in enumerate(data): 1226 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1227 1228 for i_val, new_val in np.ndenumerate(new_values): 1229 new_deltas = {} 1230 new_grad = {} 1231 if array_mode is True: 1232 for name in new_sample_names: 1233 ens_length = d_extracted[name][0].shape[-1] 1234 new_deltas[name] = np.zeros(ens_length) 1235 for i_dat, dat in enumerate(d_extracted[name]): 1236 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1237 for name in new_cov_names: 1238 new_grad[name] = 0 1239 for i_dat, dat in enumerate(g_extracted[name]): 1240 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1241 else: 1242 for j_obs, obs in np.ndenumerate(data): 1243 for name in obs.names: 1244 if name in obs.cov_names: 1245 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1246 else: 1247 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1248 1249 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1250 1251 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1252 raise Exception('The same name has been used for deltas and covobs!') 1253 new_samples = [] 1254 new_means = [] 1255 new_idl = [] 1256 new_names_obs = [] 1257 for name in new_names: 1258 if name not in new_covobs: 1259 if is_merged[name]: 1260 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1261 else: 1262 filtered_deltas = new_deltas[name] 1263 filtered_idl_d = new_idl_d[name] 1264 1265 new_samples.append(filtered_deltas) 1266 new_idl.append(filtered_idl_d) 1267 new_means.append(new_r_values[name][i_val]) 1268 new_names_obs.append(name) 1269 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1270 for name in new_covobs: 1271 final_result[i_val].names.append(name) 1272 final_result[i_val]._covobs = new_covobs 1273 final_result[i_val]._value = new_val 1274 final_result[i_val].is_merged = is_merged 1275 final_result[i_val].reweighted = reweighted 1276 1277 if multi == 0: 1278 final_result = final_result.item() 1279 1280 return final_result 1281 1282 1283def _reduce_deltas(deltas, idx_old, idx_new): 1284 """Extract deltas defined on idx_old on all configs of idx_new. 1285 1286 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1287 are ordered in an ascending order. 1288 1289 Parameters 1290 ---------- 1291 deltas : list 1292 List of fluctuations 1293 idx_old : list 1294 List or range of configs on which the deltas are defined 1295 idx_new : list 1296 List of configs for which we want to extract the deltas. 1297 Has to be a subset of idx_old. 1298 """ 1299 if not len(deltas) == len(idx_old): 1300 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1301 if type(idx_old) is range and type(idx_new) is range: 1302 if idx_old == idx_new: 1303 return deltas 1304 shape = len(idx_new) 1305 ret = np.zeros(shape) 1306 oldpos = 0 1307 for i in range(shape): 1308 pos = -1 1309 for j in range(oldpos, len(idx_old)): 1310 if idx_old[j] == idx_new[i]: 1311 pos = j 1312 break 1313 if pos < 0: 1314 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1315 ret[i] = deltas[pos] 1316 oldpos = pos 1317 return np.array(ret) 1318 1319 1320def reweight(weight, obs, **kwargs): 1321 """Reweight a list of observables. 1322 1323 Parameters 1324 ---------- 1325 weight : Obs 1326 Reweighting factor. An Observable that has to be defined on a superset of the 1327 configurations in obs[i].idl for all i. 1328 obs : list 1329 list of Obs, e.g. [obs1, obs2, obs3]. 1330 all_configs : bool 1331 if True, the reweighted observables are normalized by the average of 1332 the reweighting factor on all configurations in weight.idl and not 1333 on the configurations in obs[i].idl. 1334 """ 1335 result = [] 1336 for i in range(len(obs)): 1337 if len(obs[i].cov_names): 1338 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1339 if not set(obs[i].names).issubset(weight.names): 1340 raise Exception('Error: Ensembles do not fit') 1341 for name in obs[i].names: 1342 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1343 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1344 new_samples = [] 1345 w_deltas = {} 1346 for name in sorted(obs[i].names): 1347 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1348 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1349 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1350 1351 if kwargs.get('all_configs'): 1352 new_weight = weight 1353 else: 1354 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1355 1356 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1357 result[-1].reweighted = True 1358 result[-1].is_merged = obs[i].is_merged 1359 1360 return result 1361 1362 1363def correlate(obs_a, obs_b): 1364 """Correlate two observables. 1365 1366 Parameters 1367 ---------- 1368 obs_a : Obs 1369 First observable 1370 obs_b : Obs 1371 Second observable 1372 1373 Notes 1374 ----- 1375 Keep in mind to only correlate primary observables which have not been reweighted 1376 yet. The reweighting has to be applied after correlating the observables. 1377 Currently only works if ensembles are identical (this is not strictly necessary). 1378 """ 1379 1380 if sorted(obs_a.names) != sorted(obs_b.names): 1381 raise Exception('Ensembles do not fit') 1382 if len(obs_a.cov_names) or len(obs_b.cov_names): 1383 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1384 for name in obs_a.names: 1385 if obs_a.shape[name] != obs_b.shape[name]: 1386 raise Exception('Shapes of ensemble', name, 'do not fit') 1387 if obs_a.idl[name] != obs_b.idl[name]: 1388 raise Exception('idl of ensemble', name, 'do not fit') 1389 1390 if obs_a.reweighted is True: 1391 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1392 if obs_b.reweighted is True: 1393 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1394 1395 new_samples = [] 1396 new_idl = [] 1397 for name in sorted(obs_a.names): 1398 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1399 new_idl.append(obs_a.idl[name]) 1400 1401 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1402 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1403 o.reweighted = obs_a.reweighted or obs_b.reweighted 1404 return o 1405 1406 1407def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1408 r'''Calculates the error covariance matrix of a set of observables. 1409 1410 The gamma method has to be applied first to all observables. 1411 1412 Parameters 1413 ---------- 1414 obs : list or numpy.ndarray 1415 List or one dimensional array of Obs 1416 visualize : bool 1417 If True plots the corresponding normalized correlation matrix (default False). 1418 correlation : bool 1419 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1420 smooth : None or int 1421 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1422 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1423 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1424 small ones. 1425 1426 Notes 1427 ----- 1428 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1429 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1430 in the absence of autocorrelation. 1431 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1432 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1433 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1434 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1435 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1436 ''' 1437 1438 length = len(obs) 1439 1440 max_samples = np.max([o.N for o in obs]) 1441 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1442 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1443 1444 cov = np.zeros((length, length)) 1445 for i in range(length): 1446 for j in range(i, length): 1447 cov[i, j] = _covariance_element(obs[i], obs[j]) 1448 cov = cov + cov.T - np.diag(np.diag(cov)) 1449 1450 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1451 1452 if isinstance(smooth, int): 1453 corr = _smooth_eigenvalues(corr, smooth) 1454 1455 if visualize: 1456 plt.matshow(corr, vmin=-1, vmax=1) 1457 plt.set_cmap('RdBu') 1458 plt.colorbar() 1459 plt.draw() 1460 1461 if correlation is True: 1462 return corr 1463 1464 errors = [o.dvalue for o in obs] 1465 cov = np.diag(errors) @ corr @ np.diag(errors) 1466 1467 eigenvalues = np.linalg.eigh(cov)[0] 1468 if not np.all(eigenvalues >= 0): 1469 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1470 1471 return cov 1472 1473 1474def _smooth_eigenvalues(corr, E): 1475 """Eigenvalue smoothing as described in hep-lat/9412087 1476 1477 corr : np.ndarray 1478 correlation matrix 1479 E : integer 1480 Number of eigenvalues to be left substantially unchanged 1481 """ 1482 if not (2 < E < corr.shape[0] - 1): 1483 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1484 vals, vec = np.linalg.eigh(corr) 1485 lambda_min = np.mean(vals[:-E]) 1486 vals[vals < lambda_min] = lambda_min 1487 vals /= np.mean(vals) 1488 return vec @ np.diag(vals) @ vec.T 1489 1490 1491def _covariance_element(obs1, obs2): 1492 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1493 1494 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1495 deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1496 deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1497 return np.sum(deltas1 * deltas2) 1498 1499 if set(obs1.names).isdisjoint(set(obs2.names)): 1500 return 0.0 1501 1502 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1503 raise Exception('The gamma method has to be applied to both Obs first.') 1504 1505 dvalue = 0.0 1506 1507 for e_name in obs1.mc_names: 1508 1509 if e_name not in obs2.mc_names: 1510 continue 1511 1512 idl_d = {} 1513 for r_name in obs1.e_content[e_name]: 1514 if r_name not in obs2.e_content[e_name]: 1515 continue 1516 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1517 1518 gamma = 0.0 1519 1520 for r_name in obs1.e_content[e_name]: 1521 if r_name not in obs2.e_content[e_name]: 1522 continue 1523 if len(idl_d[r_name]) == 0: 1524 continue 1525 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1526 1527 if gamma == 0.0: 1528 continue 1529 1530 gamma_div = 0.0 1531 for r_name in obs1.e_content[e_name]: 1532 if r_name not in obs2.e_content[e_name]: 1533 continue 1534 if len(idl_d[r_name]) == 0: 1535 continue 1536 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1537 gamma /= gamma_div 1538 1539 dvalue += gamma 1540 1541 for e_name in obs1.cov_names: 1542 1543 if e_name not in obs2.cov_names: 1544 continue 1545 1546 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1547 1548 return dvalue 1549 1550 1551def import_jackknife(jacks, name, idl=None): 1552 """Imports jackknife samples and returns an Obs 1553 1554 Parameters 1555 ---------- 1556 jacks : numpy.ndarray 1557 numpy array containing the mean value as zeroth entry and 1558 the N jackknife samples as first to Nth entry. 1559 name : str 1560 name of the ensemble the samples are defined on. 1561 """ 1562 length = len(jacks) - 1 1563 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1564 samples = jacks[1:] @ prj 1565 mean = np.mean(samples) 1566 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1567 new_obs._value = jacks[0] 1568 return new_obs 1569 1570 1571def merge_obs(list_of_obs): 1572 """Combine all observables in list_of_obs into one new observable 1573 1574 Parameters 1575 ---------- 1576 list_of_obs : list 1577 list of the Obs object to be combined 1578 1579 Notes 1580 ----- 1581 It is not possible to combine obs which are based on the same replicum 1582 """ 1583 replist = [item for obs in list_of_obs for item in obs.names] 1584 if (len(replist) == len(set(replist))) is False: 1585 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1586 if any([len(o.cov_names) for o in list_of_obs]): 1587 raise Exception('Not possible to merge data that contains covobs!') 1588 new_dict = {} 1589 idl_dict = {} 1590 for o in list_of_obs: 1591 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1592 for key in set(o.deltas) | set(o.r_values)}) 1593 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1594 1595 names = sorted(new_dict.keys()) 1596 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1597 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1598 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1599 return o 1600 1601 1602def cov_Obs(means, cov, name, grad=None): 1603 """Create an Obs based on mean(s) and a covariance matrix 1604 1605 Parameters 1606 ---------- 1607 mean : list of floats or float 1608 N mean value(s) of the new Obs 1609 cov : list or array 1610 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1611 name : str 1612 identifier for the covariance matrix 1613 grad : list or array 1614 Gradient of the Covobs wrt. the means belonging to cov. 1615 """ 1616 1617 def covobs_to_obs(co): 1618 """Make an Obs out of a Covobs 1619 1620 Parameters 1621 ---------- 1622 co : Covobs 1623 Covobs to be embedded into the Obs 1624 """ 1625 o = Obs([], [], means=[]) 1626 o._value = co.value 1627 o.names.append(co.name) 1628 o._covobs[co.name] = co 1629 o._dvalue = np.sqrt(co.errsq()) 1630 return o 1631 1632 ol = [] 1633 if isinstance(means, (float, int)): 1634 means = [means] 1635 1636 for i in range(len(means)): 1637 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1638 if ol[0].covobs[name].N != len(means): 1639 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1640 if len(ol) == 1: 1641 return ol[0] 1642 return ol
16class Obs: 17 """Class for a general observable. 18 19 Instances of Obs are the basic objects of a pyerrors error analysis. 20 They are initialized with a list which contains arrays of samples for 21 different ensembles/replica and another list of same length which contains 22 the names of the ensembles/replica. Mathematical operations can be 23 performed on instances. The result is another instance of Obs. The error of 24 an instance can be computed with the gamma_method. Also contains additional 25 methods for output and visualization of the error calculation. 26 27 Attributes 28 ---------- 29 S_global : float 30 Standard value for S (default 2.0) 31 S_dict : dict 32 Dictionary for S values. If an entry for a given ensemble 33 exists this overwrites the standard value for that ensemble. 34 tau_exp_global : float 35 Standard value for tau_exp (default 0.0) 36 tau_exp_dict : dict 37 Dictionary for tau_exp values. If an entry for a given ensemble exists 38 this overwrites the standard value for that ensemble. 39 N_sigma_global : float 40 Standard value for N_sigma (default 1.0) 41 N_sigma_dict : dict 42 Dictionary for N_sigma values. If an entry for a given ensemble exists 43 this overwrites the standard value for that ensemble. 44 """ 45 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 46 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 47 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 48 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 49 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 50 51 S_global = 2.0 52 S_dict = {} 53 tau_exp_global = 0.0 54 tau_exp_dict = {} 55 N_sigma_global = 1.0 56 N_sigma_dict = {} 57 filter_eps = 1e-10 58 59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 271 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 272 # Make sure no entry of tauint is smaller than 0.5 273 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 274 # hep-lat/0306017 eq. (42) 275 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 276 self.e_n_dtauint[e_name][0] = 0.0 277 278 def _compute_drho(i): 279 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 280 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 281 282 _compute_drho(1) 283 if self.tau_exp[e_name] > 0: 284 texp = self.tau_exp[e_name] 285 # Critical slowing down analysis 286 if w_max // 2 <= 1: 287 raise Exception("Need at least 8 samples for tau_exp error analysis") 288 for n in range(1, w_max // 2): 289 _compute_drho(n + 1) 290 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 291 # Bias correction hep-lat/0306017 eq. (49) included 292 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 293 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 294 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 295 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 296 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 297 self.e_windowsize[e_name] = n 298 break 299 else: 300 if self.S[e_name] == 0.0: 301 self.e_tauint[e_name] = 0.5 302 self.e_dtauint[e_name] = 0.0 303 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 305 self.e_windowsize[e_name] = 0 306 else: 307 # Standard automatic windowing procedure 308 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 309 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 310 for n in range(1, w_max): 311 if n < w_max // 2 - 2: 312 _compute_drho(n + 1) 313 if g_w[n - 1] < 0 or n >= w_max - 1: 314 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 315 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 316 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 318 self.e_windowsize[e_name] = n 319 break 320 321 self._dvalue += self.e_dvalue[e_name] ** 2 322 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 323 324 for e_name in self.cov_names: 325 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 326 self.e_ddvalue[e_name] = 0 327 self._dvalue += self.e_dvalue[e_name]**2 328 329 self._dvalue = np.sqrt(self._dvalue) 330 if self._dvalue == 0.0: 331 self.ddvalue = 0.0 332 else: 333 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 334 return 335 336 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 337 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 338 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 339 340 Parameters 341 ---------- 342 deltas : list 343 List of fluctuations 344 idx : list 345 List or range of configurations on which the deltas are defined. 346 shape : int 347 Number of configurations in idx. 348 w_max : int 349 Upper bound for the summation window. 350 fft : bool 351 determines whether the fft algorithm is used for the computation 352 of the autocorrelation function. 353 """ 354 gamma = np.zeros(w_max) 355 deltas = _expand_deltas(deltas, idx, shape) 356 new_shape = len(deltas) 357 if fft: 358 max_gamma = min(new_shape, w_max) 359 # The padding for the fft has to be even 360 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 361 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 362 else: 363 for n in range(w_max): 364 if new_shape - n >= 0: 365 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 366 367 return gamma 368 369 def details(self, ens_content=True): 370 """Output detailed properties of the Obs. 371 372 Parameters 373 ---------- 374 ens_content : bool 375 print details about the ensembles and replica if true. 376 """ 377 if self.tag is not None: 378 print("Description:", self.tag) 379 if not hasattr(self, 'e_dvalue'): 380 print('Result\t %3.8e' % (self.value)) 381 else: 382 if self.value == 0.0: 383 percentage = np.nan 384 else: 385 percentage = np.abs(self._dvalue / self.value) * 100 386 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 387 if len(self.e_names) > 1: 388 print(' Ensemble errors:') 389 for e_name in self.mc_names: 390 if len(self.e_names) > 1: 391 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 392 if self.tau_exp[e_name] > 0: 393 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 394 else: 395 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 396 for e_name in self.cov_names: 397 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 398 if ens_content is True: 399 if len(self.e_names) == 1: 400 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 401 else: 402 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 403 my_string_list = [] 404 for key, value in sorted(self.e_content.items()): 405 if key not in self.covobs: 406 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 407 if len(value) == 1: 408 my_string += f': {self.shape[value[0]]} configurations' 409 if isinstance(self.idl[value[0]], range): 410 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 411 else: 412 my_string += ' (irregular range)' 413 else: 414 sublist = [] 415 for v in value: 416 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 417 my_substring += f': {self.shape[v]} configurations' 418 if isinstance(self.idl[v], range): 419 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 420 else: 421 my_substring += ' (irregular range)' 422 sublist.append(my_substring) 423 424 my_string += '\n' + '\n'.join(sublist) 425 else: 426 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 427 my_string_list.append(my_string) 428 print('\n'.join(my_string_list)) 429 430 def is_zero_within_error(self, sigma=1): 431 """Checks whether the observable is zero within 'sigma' standard errors. 432 433 Parameters 434 ---------- 435 sigma : int 436 Number of standard errors used for the check. 437 438 Works only properly when the gamma method was run. 439 """ 440 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 441 442 def is_zero(self, atol=1e-10): 443 """Checks whether the observable is zero within a given tolerance. 444 445 Parameters 446 ---------- 447 atol : float 448 Absolute tolerance (for details see numpy documentation). 449 """ 450 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 451 452 def plot_tauint(self, save=None): 453 """Plot integrated autocorrelation time for each ensemble. 454 455 Parameters 456 ---------- 457 save : str 458 saves the figure to a file named 'save' if. 459 """ 460 if not hasattr(self, 'e_dvalue'): 461 raise Exception('Run the gamma method first.') 462 463 for e, e_name in enumerate(self.mc_names): 464 fig = plt.figure() 465 plt.xlabel(r'$W$') 466 plt.ylabel(r'$\tau_\mathrm{int}$') 467 length = int(len(self.e_n_tauint[e_name])) 468 if self.tau_exp[e_name] > 0: 469 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 470 x_help = np.arange(2 * self.tau_exp[e_name]) 471 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 472 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 473 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 474 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 475 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 476 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 477 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 478 else: 479 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 480 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 481 482 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 483 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 484 plt.legend() 485 plt.xlim(-0.5, xmax) 486 ylim = plt.ylim() 487 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 488 plt.draw() 489 if save: 490 fig.savefig(save + "_" + str(e)) 491 492 def plot_rho(self, save=None): 493 """Plot normalized autocorrelation function time for each ensemble. 494 495 Parameters 496 ---------- 497 save : str 498 saves the figure to a file named 'save' if. 499 """ 500 if not hasattr(self, 'e_dvalue'): 501 raise Exception('Run the gamma method first.') 502 for e, e_name in enumerate(self.mc_names): 503 fig = plt.figure() 504 plt.xlabel('W') 505 plt.ylabel('rho') 506 length = int(len(self.e_drho[e_name])) 507 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 508 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 509 if self.tau_exp[e_name] > 0: 510 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 511 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 512 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 513 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 514 else: 515 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 516 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 517 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 518 plt.xlim(-0.5, xmax) 519 plt.draw() 520 if save: 521 fig.savefig(save + "_" + str(e)) 522 523 def plot_rep_dist(self): 524 """Plot replica distribution for each ensemble with more than one replicum.""" 525 if not hasattr(self, 'e_dvalue'): 526 raise Exception('Run the gamma method first.') 527 for e, e_name in enumerate(self.mc_names): 528 if len(self.e_content[e_name]) == 1: 529 print('No replica distribution for a single replicum (', e_name, ')') 530 continue 531 r_length = [] 532 sub_r_mean = 0 533 for r, r_name in enumerate(self.e_content[e_name]): 534 r_length.append(len(self.deltas[r_name])) 535 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 536 e_N = np.sum(r_length) 537 sub_r_mean /= e_N 538 arr = np.zeros(len(self.e_content[e_name])) 539 for r, r_name in enumerate(self.e_content[e_name]): 540 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 541 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 542 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 543 plt.draw() 544 545 def plot_history(self, expand=True): 546 """Plot derived Monte Carlo history for each ensemble 547 548 Parameters 549 ---------- 550 expand : bool 551 show expanded history for irregular Monte Carlo chains (default: True). 552 """ 553 for e, e_name in enumerate(self.mc_names): 554 plt.figure() 555 r_length = [] 556 tmp = [] 557 tmp_expanded = [] 558 for r, r_name in enumerate(self.e_content[e_name]): 559 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 560 if expand: 561 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 562 r_length.append(len(tmp_expanded[-1])) 563 else: 564 r_length.append(len(tmp[-1])) 565 e_N = np.sum(r_length) 566 x = np.arange(e_N) 567 y_test = np.concatenate(tmp, axis=0) 568 if expand: 569 y = np.concatenate(tmp_expanded, axis=0) 570 else: 571 y = y_test 572 plt.errorbar(x, y, fmt='.', markersize=3) 573 plt.xlim(-0.5, e_N - 0.5) 574 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 575 plt.draw() 576 577 def plot_piechart(self, save=None): 578 """Plot piechart which shows the fractional contribution of each 579 ensemble to the error and returns a dictionary containing the fractions. 580 581 Parameters 582 ---------- 583 save : str 584 saves the figure to a file named 'save' if. 585 """ 586 if not hasattr(self, 'e_dvalue'): 587 raise Exception('Run the gamma method first.') 588 if np.isclose(0.0, self._dvalue, atol=1e-15): 589 raise Exception('Error is 0.0') 590 labels = self.e_names 591 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 592 fig1, ax1 = plt.subplots() 593 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 594 ax1.axis('equal') 595 plt.draw() 596 if save: 597 fig1.savefig(save) 598 599 return dict(zip(self.e_names, sizes)) 600 601 def dump(self, filename, datatype="json.gz", description="", **kwargs): 602 """Dump the Obs to a file 'name' of chosen format. 603 604 Parameters 605 ---------- 606 filename : str 607 name of the file to be saved. 608 datatype : str 609 Format of the exported file. Supported formats include 610 "json.gz" and "pickle" 611 description : str 612 Description for output file, only relevant for json.gz format. 613 path : str 614 specifies a custom path for the file (default '.') 615 """ 616 if 'path' in kwargs: 617 file_name = kwargs.get('path') + '/' + filename 618 else: 619 file_name = filename 620 621 if datatype == "json.gz": 622 from .input.json import dump_to_json 623 dump_to_json([self], file_name, description=description) 624 elif datatype == "pickle": 625 with open(file_name + '.p', 'wb') as fb: 626 pickle.dump(self, fb) 627 else: 628 raise Exception("Unknown datatype " + str(datatype)) 629 630 def export_jackknife(self): 631 """Export jackknife samples from the Obs 632 633 Returns 634 ------- 635 numpy.ndarray 636 Returns a numpy array of length N + 1 where N is the number of samples 637 for the given ensemble and replicum. The zeroth entry of the array contains 638 the mean value of the Obs, entries 1 to N contain the N jackknife samples 639 derived from the Obs. The current implementation only works for observables 640 defined on exactly one ensemble and replicum. The derived jackknife samples 641 should agree with samples from a full jackknife analysis up to O(1/N). 642 """ 643 644 if len(self.names) != 1: 645 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 646 647 name = self.names[0] 648 full_data = self.deltas[name] + self.r_values[name] 649 n = full_data.size 650 mean = self.value 651 tmp_jacks = np.zeros(n + 1) 652 tmp_jacks[0] = mean 653 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 654 return tmp_jacks 655 656 def __float__(self): 657 return float(self.value) 658 659 def __repr__(self): 660 return 'Obs[' + str(self) + ']' 661 662 def __str__(self): 663 if self._dvalue == 0.0: 664 return str(self.value) 665 fexp = np.floor(np.log10(self._dvalue)) 666 if fexp < 0.0: 667 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 668 elif fexp == 0.0: 669 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 670 else: 671 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 672 673 # Overload comparisons 674 def __lt__(self, other): 675 return self.value < other 676 677 def __le__(self, other): 678 return self.value <= other 679 680 def __gt__(self, other): 681 return self.value > other 682 683 def __ge__(self, other): 684 return self.value >= other 685 686 def __eq__(self, other): 687 return (self - other).is_zero() 688 689 def __ne__(self, other): 690 return not (self - other).is_zero() 691 692 # Overload math operations 693 def __add__(self, y): 694 if isinstance(y, Obs): 695 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 696 else: 697 if isinstance(y, np.ndarray): 698 return np.array([self + o for o in y]) 699 elif y.__class__.__name__ in ['Corr', 'CObs']: 700 return NotImplemented 701 else: 702 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 703 704 def __radd__(self, y): 705 return self + y 706 707 def __mul__(self, y): 708 if isinstance(y, Obs): 709 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 710 else: 711 if isinstance(y, np.ndarray): 712 return np.array([self * o for o in y]) 713 elif isinstance(y, complex): 714 return CObs(self * y.real, self * y.imag) 715 elif y.__class__.__name__ in ['Corr', 'CObs']: 716 return NotImplemented 717 else: 718 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 719 720 def __rmul__(self, y): 721 return self * y 722 723 def __sub__(self, y): 724 if isinstance(y, Obs): 725 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 726 else: 727 if isinstance(y, np.ndarray): 728 return np.array([self - o for o in y]) 729 elif y.__class__.__name__ in ['Corr', 'CObs']: 730 return NotImplemented 731 else: 732 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 733 734 def __rsub__(self, y): 735 return -1 * (self - y) 736 737 def __pos__(self): 738 return self 739 740 def __neg__(self): 741 return -1 * self 742 743 def __truediv__(self, y): 744 if isinstance(y, Obs): 745 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 746 else: 747 if isinstance(y, np.ndarray): 748 return np.array([self / o for o in y]) 749 elif y.__class__.__name__ in ['Corr', 'CObs']: 750 return NotImplemented 751 else: 752 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 753 754 def __rtruediv__(self, y): 755 if isinstance(y, Obs): 756 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 757 else: 758 if isinstance(y, np.ndarray): 759 return np.array([o / self for o in y]) 760 elif y.__class__.__name__ in ['Corr', 'CObs']: 761 return NotImplemented 762 else: 763 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 764 765 def __pow__(self, y): 766 if isinstance(y, Obs): 767 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 768 else: 769 return derived_observable(lambda x: x[0] ** y, [self]) 770 771 def __rpow__(self, y): 772 if isinstance(y, Obs): 773 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 774 else: 775 return derived_observable(lambda x: y ** x[0], [self]) 776 777 def __abs__(self): 778 return derived_observable(lambda x: anp.abs(x[0]), [self]) 779 780 # Overload numpy functions 781 def sqrt(self): 782 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 783 784 def log(self): 785 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 786 787 def exp(self): 788 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 789 790 def sin(self): 791 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 792 793 def cos(self): 794 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 795 796 def tan(self): 797 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 798 799 def arcsin(self): 800 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 801 802 def arccos(self): 803 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 804 805 def arctan(self): 806 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 807 808 def sinh(self): 809 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 810 811 def cosh(self): 812 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 813 814 def tanh(self): 815 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 816 817 def arcsinh(self): 818 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 819 820 def arccosh(self): 821 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 822 823 def arctanh(self): 824 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 271 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 272 # Make sure no entry of tauint is smaller than 0.5 273 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 274 # hep-lat/0306017 eq. (42) 275 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 276 self.e_n_dtauint[e_name][0] = 0.0 277 278 def _compute_drho(i): 279 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 280 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 281 282 _compute_drho(1) 283 if self.tau_exp[e_name] > 0: 284 texp = self.tau_exp[e_name] 285 # Critical slowing down analysis 286 if w_max // 2 <= 1: 287 raise Exception("Need at least 8 samples for tau_exp error analysis") 288 for n in range(1, w_max // 2): 289 _compute_drho(n + 1) 290 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 291 # Bias correction hep-lat/0306017 eq. (49) included 292 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 293 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 294 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 295 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 296 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 297 self.e_windowsize[e_name] = n 298 break 299 else: 300 if self.S[e_name] == 0.0: 301 self.e_tauint[e_name] = 0.5 302 self.e_dtauint[e_name] = 0.0 303 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 305 self.e_windowsize[e_name] = 0 306 else: 307 # Standard automatic windowing procedure 308 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 309 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 310 for n in range(1, w_max): 311 if n < w_max // 2 - 2: 312 _compute_drho(n + 1) 313 if g_w[n - 1] < 0 or n >= w_max - 1: 314 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 315 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 316 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 318 self.e_windowsize[e_name] = n 319 break 320 321 self._dvalue += self.e_dvalue[e_name] ** 2 322 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 323 324 for e_name in self.cov_names: 325 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 326 self.e_ddvalue[e_name] = 0 327 self._dvalue += self.e_dvalue[e_name]**2 328 329 self._dvalue = np.sqrt(self._dvalue) 330 if self._dvalue == 0.0: 331 self.ddvalue = 0.0 332 else: 333 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 334 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
369 def details(self, ens_content=True): 370 """Output detailed properties of the Obs. 371 372 Parameters 373 ---------- 374 ens_content : bool 375 print details about the ensembles and replica if true. 376 """ 377 if self.tag is not None: 378 print("Description:", self.tag) 379 if not hasattr(self, 'e_dvalue'): 380 print('Result\t %3.8e' % (self.value)) 381 else: 382 if self.value == 0.0: 383 percentage = np.nan 384 else: 385 percentage = np.abs(self._dvalue / self.value) * 100 386 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 387 if len(self.e_names) > 1: 388 print(' Ensemble errors:') 389 for e_name in self.mc_names: 390 if len(self.e_names) > 1: 391 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 392 if self.tau_exp[e_name] > 0: 393 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 394 else: 395 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 396 for e_name in self.cov_names: 397 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 398 if ens_content is True: 399 if len(self.e_names) == 1: 400 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 401 else: 402 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 403 my_string_list = [] 404 for key, value in sorted(self.e_content.items()): 405 if key not in self.covobs: 406 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 407 if len(value) == 1: 408 my_string += f': {self.shape[value[0]]} configurations' 409 if isinstance(self.idl[value[0]], range): 410 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 411 else: 412 my_string += ' (irregular range)' 413 else: 414 sublist = [] 415 for v in value: 416 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 417 my_substring += f': {self.shape[v]} configurations' 418 if isinstance(self.idl[v], range): 419 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 420 else: 421 my_substring += ' (irregular range)' 422 sublist.append(my_substring) 423 424 my_string += '\n' + '\n'.join(sublist) 425 else: 426 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 427 my_string_list.append(my_string) 428 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
430 def is_zero_within_error(self, sigma=1): 431 """Checks whether the observable is zero within 'sigma' standard errors. 432 433 Parameters 434 ---------- 435 sigma : int 436 Number of standard errors used for the check. 437 438 Works only properly when the gamma method was run. 439 """ 440 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
442 def is_zero(self, atol=1e-10): 443 """Checks whether the observable is zero within a given tolerance. 444 445 Parameters 446 ---------- 447 atol : float 448 Absolute tolerance (for details see numpy documentation). 449 """ 450 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
452 def plot_tauint(self, save=None): 453 """Plot integrated autocorrelation time for each ensemble. 454 455 Parameters 456 ---------- 457 save : str 458 saves the figure to a file named 'save' if. 459 """ 460 if not hasattr(self, 'e_dvalue'): 461 raise Exception('Run the gamma method first.') 462 463 for e, e_name in enumerate(self.mc_names): 464 fig = plt.figure() 465 plt.xlabel(r'$W$') 466 plt.ylabel(r'$\tau_\mathrm{int}$') 467 length = int(len(self.e_n_tauint[e_name])) 468 if self.tau_exp[e_name] > 0: 469 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 470 x_help = np.arange(2 * self.tau_exp[e_name]) 471 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 472 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 473 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 474 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 475 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 476 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 477 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 478 else: 479 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 480 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 481 482 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 483 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 484 plt.legend() 485 plt.xlim(-0.5, xmax) 486 ylim = plt.ylim() 487 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 488 plt.draw() 489 if save: 490 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
492 def plot_rho(self, save=None): 493 """Plot normalized autocorrelation function time for each ensemble. 494 495 Parameters 496 ---------- 497 save : str 498 saves the figure to a file named 'save' if. 499 """ 500 if not hasattr(self, 'e_dvalue'): 501 raise Exception('Run the gamma method first.') 502 for e, e_name in enumerate(self.mc_names): 503 fig = plt.figure() 504 plt.xlabel('W') 505 plt.ylabel('rho') 506 length = int(len(self.e_drho[e_name])) 507 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 508 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 509 if self.tau_exp[e_name] > 0: 510 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 511 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 512 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 513 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 514 else: 515 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 516 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 517 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 518 plt.xlim(-0.5, xmax) 519 plt.draw() 520 if save: 521 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
523 def plot_rep_dist(self): 524 """Plot replica distribution for each ensemble with more than one replicum.""" 525 if not hasattr(self, 'e_dvalue'): 526 raise Exception('Run the gamma method first.') 527 for e, e_name in enumerate(self.mc_names): 528 if len(self.e_content[e_name]) == 1: 529 print('No replica distribution for a single replicum (', e_name, ')') 530 continue 531 r_length = [] 532 sub_r_mean = 0 533 for r, r_name in enumerate(self.e_content[e_name]): 534 r_length.append(len(self.deltas[r_name])) 535 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 536 e_N = np.sum(r_length) 537 sub_r_mean /= e_N 538 arr = np.zeros(len(self.e_content[e_name])) 539 for r, r_name in enumerate(self.e_content[e_name]): 540 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 541 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 542 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 543 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
545 def plot_history(self, expand=True): 546 """Plot derived Monte Carlo history for each ensemble 547 548 Parameters 549 ---------- 550 expand : bool 551 show expanded history for irregular Monte Carlo chains (default: True). 552 """ 553 for e, e_name in enumerate(self.mc_names): 554 plt.figure() 555 r_length = [] 556 tmp = [] 557 tmp_expanded = [] 558 for r, r_name in enumerate(self.e_content[e_name]): 559 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 560 if expand: 561 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 562 r_length.append(len(tmp_expanded[-1])) 563 else: 564 r_length.append(len(tmp[-1])) 565 e_N = np.sum(r_length) 566 x = np.arange(e_N) 567 y_test = np.concatenate(tmp, axis=0) 568 if expand: 569 y = np.concatenate(tmp_expanded, axis=0) 570 else: 571 y = y_test 572 plt.errorbar(x, y, fmt='.', markersize=3) 573 plt.xlim(-0.5, e_N - 0.5) 574 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 575 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
577 def plot_piechart(self, save=None): 578 """Plot piechart which shows the fractional contribution of each 579 ensemble to the error and returns a dictionary containing the fractions. 580 581 Parameters 582 ---------- 583 save : str 584 saves the figure to a file named 'save' if. 585 """ 586 if not hasattr(self, 'e_dvalue'): 587 raise Exception('Run the gamma method first.') 588 if np.isclose(0.0, self._dvalue, atol=1e-15): 589 raise Exception('Error is 0.0') 590 labels = self.e_names 591 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 592 fig1, ax1 = plt.subplots() 593 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 594 ax1.axis('equal') 595 plt.draw() 596 if save: 597 fig1.savefig(save) 598 599 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
601 def dump(self, filename, datatype="json.gz", description="", **kwargs): 602 """Dump the Obs to a file 'name' of chosen format. 603 604 Parameters 605 ---------- 606 filename : str 607 name of the file to be saved. 608 datatype : str 609 Format of the exported file. Supported formats include 610 "json.gz" and "pickle" 611 description : str 612 Description for output file, only relevant for json.gz format. 613 path : str 614 specifies a custom path for the file (default '.') 615 """ 616 if 'path' in kwargs: 617 file_name = kwargs.get('path') + '/' + filename 618 else: 619 file_name = filename 620 621 if datatype == "json.gz": 622 from .input.json import dump_to_json 623 dump_to_json([self], file_name, description=description) 624 elif datatype == "pickle": 625 with open(file_name + '.p', 'wb') as fb: 626 pickle.dump(self, fb) 627 else: 628 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
630 def export_jackknife(self): 631 """Export jackknife samples from the Obs 632 633 Returns 634 ------- 635 numpy.ndarray 636 Returns a numpy array of length N + 1 where N is the number of samples 637 for the given ensemble and replicum. The zeroth entry of the array contains 638 the mean value of the Obs, entries 1 to N contain the N jackknife samples 639 derived from the Obs. The current implementation only works for observables 640 defined on exactly one ensemble and replicum. The derived jackknife samples 641 should agree with samples from a full jackknife analysis up to O(1/N). 642 """ 643 644 if len(self.names) != 1: 645 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 646 647 name = self.names[0] 648 full_data = self.deltas[name] + self.r_values[name] 649 n = full_data.size 650 mean = self.value 651 tmp_jacks = np.zeros(n + 1) 652 tmp_jacks[0] = mean 653 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 654 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
827class CObs: 828 """Class for a complex valued observable.""" 829 __slots__ = ['_real', '_imag', 'tag'] 830 831 def __init__(self, real, imag=0.0): 832 self._real = real 833 self._imag = imag 834 self.tag = None 835 836 @property 837 def real(self): 838 return self._real 839 840 @property 841 def imag(self): 842 return self._imag 843 844 def gamma_method(self, **kwargs): 845 """Executes the gamma_method for the real and the imaginary part.""" 846 if isinstance(self.real, Obs): 847 self.real.gamma_method(**kwargs) 848 if isinstance(self.imag, Obs): 849 self.imag.gamma_method(**kwargs) 850 851 def is_zero(self): 852 """Checks whether both real and imaginary part are zero within machine precision.""" 853 return self.real == 0.0 and self.imag == 0.0 854 855 def conjugate(self): 856 return CObs(self.real, -self.imag) 857 858 def __add__(self, other): 859 if isinstance(other, np.ndarray): 860 return other + self 861 elif hasattr(other, 'real') and hasattr(other, 'imag'): 862 return CObs(self.real + other.real, 863 self.imag + other.imag) 864 else: 865 return CObs(self.real + other, self.imag) 866 867 def __radd__(self, y): 868 return self + y 869 870 def __sub__(self, other): 871 if isinstance(other, np.ndarray): 872 return -1 * (other - self) 873 elif hasattr(other, 'real') and hasattr(other, 'imag'): 874 return CObs(self.real - other.real, self.imag - other.imag) 875 else: 876 return CObs(self.real - other, self.imag) 877 878 def __rsub__(self, other): 879 return -1 * (self - other) 880 881 def __mul__(self, other): 882 if isinstance(other, np.ndarray): 883 return other * self 884 elif hasattr(other, 'real') and hasattr(other, 'imag'): 885 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 886 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 887 [self.real, other.real, self.imag, other.imag], 888 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 889 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 890 [self.real, other.real, self.imag, other.imag], 891 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 892 elif getattr(other, 'imag', 0) != 0: 893 return CObs(self.real * other.real - self.imag * other.imag, 894 self.imag * other.real + self.real * other.imag) 895 else: 896 return CObs(self.real * other.real, self.imag * other.real) 897 else: 898 return CObs(self.real * other, self.imag * other) 899 900 def __rmul__(self, other): 901 return self * other 902 903 def __truediv__(self, other): 904 if isinstance(other, np.ndarray): 905 return 1 / (other / self) 906 elif hasattr(other, 'real') and hasattr(other, 'imag'): 907 r = other.real ** 2 + other.imag ** 2 908 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 909 else: 910 return CObs(self.real / other, self.imag / other) 911 912 def __rtruediv__(self, other): 913 r = self.real ** 2 + self.imag ** 2 914 if hasattr(other, 'real') and hasattr(other, 'imag'): 915 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 916 else: 917 return CObs(self.real * other / r, -self.imag * other / r) 918 919 def __abs__(self): 920 return np.sqrt(self.real**2 + self.imag**2) 921 922 def __pos__(self): 923 return self 924 925 def __neg__(self): 926 return -1 * self 927 928 def __eq__(self, other): 929 return self.real == other.real and self.imag == other.imag 930 931 def __str__(self): 932 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 933 934 def __repr__(self): 935 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
844 def gamma_method(self, **kwargs): 845 """Executes the gamma_method for the real and the imaginary part.""" 846 if isinstance(self.real, Obs): 847 self.real.gamma_method(**kwargs) 848 if isinstance(self.imag, Obs): 849 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1104def derived_observable(func, data, array_mode=False, **kwargs): 1105 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1106 1107 Parameters 1108 ---------- 1109 func : object 1110 arbitrary function of the form func(data, **kwargs). For the 1111 automatic differentiation to work, all numpy functions have to have 1112 the autograd wrapper (use 'import autograd.numpy as anp'). 1113 data : list 1114 list of Obs, e.g. [obs1, obs2, obs3]. 1115 num_grad : bool 1116 if True, numerical derivatives are used instead of autograd 1117 (default False). To control the numerical differentiation the 1118 kwargs of numdifftools.step_generators.MaxStepGenerator 1119 can be used. 1120 man_grad : list 1121 manually supply a list or an array which contains the jacobian 1122 of func. Use cautiously, supplying the wrong derivative will 1123 not be intercepted. 1124 1125 Notes 1126 ----- 1127 For simple mathematical operations it can be practical to use anonymous 1128 functions. For the ratio of two observables one can e.g. use 1129 1130 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1131 """ 1132 1133 data = np.asarray(data) 1134 raveled_data = data.ravel() 1135 1136 # Workaround for matrix operations containing non Obs data 1137 if not all(isinstance(x, Obs) for x in raveled_data): 1138 for i in range(len(raveled_data)): 1139 if isinstance(raveled_data[i], (int, float)): 1140 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1141 1142 allcov = {} 1143 for o in raveled_data: 1144 for name in o.cov_names: 1145 if name in allcov: 1146 if not np.allclose(allcov[name], o.covobs[name].cov): 1147 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1148 else: 1149 allcov[name] = o.covobs[name].cov 1150 1151 n_obs = len(raveled_data) 1152 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1153 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1154 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1155 1156 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1157 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1158 1159 if data.ndim == 1: 1160 values = np.array([o.value for o in data]) 1161 else: 1162 values = np.vectorize(lambda x: x.value)(data) 1163 1164 new_values = func(values, **kwargs) 1165 1166 multi = int(isinstance(new_values, np.ndarray)) 1167 1168 new_r_values = {} 1169 new_idl_d = {} 1170 for name in new_sample_names: 1171 idl = [] 1172 tmp_values = np.zeros(n_obs) 1173 for i, item in enumerate(raveled_data): 1174 tmp_values[i] = item.r_values.get(name, item.value) 1175 tmp_idl = item.idl.get(name) 1176 if tmp_idl is not None: 1177 idl.append(tmp_idl) 1178 if multi > 0: 1179 tmp_values = np.array(tmp_values).reshape(data.shape) 1180 new_r_values[name] = func(tmp_values, **kwargs) 1181 new_idl_d[name] = _merge_idx(idl) 1182 if not is_merged[name]: 1183 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1184 1185 if 'man_grad' in kwargs: 1186 deriv = np.asarray(kwargs.get('man_grad')) 1187 if new_values.shape + data.shape != deriv.shape: 1188 raise Exception('Manual derivative does not have correct shape.') 1189 elif kwargs.get('num_grad') is True: 1190 if multi > 0: 1191 raise Exception('Multi mode currently not supported for numerical derivative') 1192 options = { 1193 'base_step': 0.1, 1194 'step_ratio': 2.5} 1195 for key in options.keys(): 1196 kwarg = kwargs.get(key) 1197 if kwarg is not None: 1198 options[key] = kwarg 1199 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1200 if tmp_df.size == 1: 1201 deriv = np.array([tmp_df.real]) 1202 else: 1203 deriv = tmp_df.real 1204 else: 1205 deriv = jacobian(func)(values, **kwargs) 1206 1207 final_result = np.zeros(new_values.shape, dtype=object) 1208 1209 if array_mode is True: 1210 1211 class _Zero_grad(): 1212 def __init__(self, N): 1213 self.grad = np.zeros((N, 1)) 1214 1215 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1216 d_extracted = {} 1217 g_extracted = {} 1218 for name in new_sample_names: 1219 d_extracted[name] = [] 1220 ens_length = len(new_idl_d[name]) 1221 for i_dat, dat in enumerate(data): 1222 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1223 for name in new_cov_names: 1224 g_extracted[name] = [] 1225 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1226 for i_dat, dat in enumerate(data): 1227 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1228 1229 for i_val, new_val in np.ndenumerate(new_values): 1230 new_deltas = {} 1231 new_grad = {} 1232 if array_mode is True: 1233 for name in new_sample_names: 1234 ens_length = d_extracted[name][0].shape[-1] 1235 new_deltas[name] = np.zeros(ens_length) 1236 for i_dat, dat in enumerate(d_extracted[name]): 1237 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1238 for name in new_cov_names: 1239 new_grad[name] = 0 1240 for i_dat, dat in enumerate(g_extracted[name]): 1241 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1242 else: 1243 for j_obs, obs in np.ndenumerate(data): 1244 for name in obs.names: 1245 if name in obs.cov_names: 1246 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1247 else: 1248 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1249 1250 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1251 1252 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1253 raise Exception('The same name has been used for deltas and covobs!') 1254 new_samples = [] 1255 new_means = [] 1256 new_idl = [] 1257 new_names_obs = [] 1258 for name in new_names: 1259 if name not in new_covobs: 1260 if is_merged[name]: 1261 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1262 else: 1263 filtered_deltas = new_deltas[name] 1264 filtered_idl_d = new_idl_d[name] 1265 1266 new_samples.append(filtered_deltas) 1267 new_idl.append(filtered_idl_d) 1268 new_means.append(new_r_values[name][i_val]) 1269 new_names_obs.append(name) 1270 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1271 for name in new_covobs: 1272 final_result[i_val].names.append(name) 1273 final_result[i_val]._covobs = new_covobs 1274 final_result[i_val]._value = new_val 1275 final_result[i_val].is_merged = is_merged 1276 final_result[i_val].reweighted = reweighted 1277 1278 if multi == 0: 1279 final_result = final_result.item() 1280 1281 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1321def reweight(weight, obs, **kwargs): 1322 """Reweight a list of observables. 1323 1324 Parameters 1325 ---------- 1326 weight : Obs 1327 Reweighting factor. An Observable that has to be defined on a superset of the 1328 configurations in obs[i].idl for all i. 1329 obs : list 1330 list of Obs, e.g. [obs1, obs2, obs3]. 1331 all_configs : bool 1332 if True, the reweighted observables are normalized by the average of 1333 the reweighting factor on all configurations in weight.idl and not 1334 on the configurations in obs[i].idl. 1335 """ 1336 result = [] 1337 for i in range(len(obs)): 1338 if len(obs[i].cov_names): 1339 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1340 if not set(obs[i].names).issubset(weight.names): 1341 raise Exception('Error: Ensembles do not fit') 1342 for name in obs[i].names: 1343 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1344 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1345 new_samples = [] 1346 w_deltas = {} 1347 for name in sorted(obs[i].names): 1348 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1349 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1350 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1351 1352 if kwargs.get('all_configs'): 1353 new_weight = weight 1354 else: 1355 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1356 1357 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1358 result[-1].reweighted = True 1359 result[-1].is_merged = obs[i].is_merged 1360 1361 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
1364def correlate(obs_a, obs_b): 1365 """Correlate two observables. 1366 1367 Parameters 1368 ---------- 1369 obs_a : Obs 1370 First observable 1371 obs_b : Obs 1372 Second observable 1373 1374 Notes 1375 ----- 1376 Keep in mind to only correlate primary observables which have not been reweighted 1377 yet. The reweighting has to be applied after correlating the observables. 1378 Currently only works if ensembles are identical (this is not strictly necessary). 1379 """ 1380 1381 if sorted(obs_a.names) != sorted(obs_b.names): 1382 raise Exception('Ensembles do not fit') 1383 if len(obs_a.cov_names) or len(obs_b.cov_names): 1384 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1385 for name in obs_a.names: 1386 if obs_a.shape[name] != obs_b.shape[name]: 1387 raise Exception('Shapes of ensemble', name, 'do not fit') 1388 if obs_a.idl[name] != obs_b.idl[name]: 1389 raise Exception('idl of ensemble', name, 'do not fit') 1390 1391 if obs_a.reweighted is True: 1392 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1393 if obs_b.reweighted is True: 1394 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1395 1396 new_samples = [] 1397 new_idl = [] 1398 for name in sorted(obs_a.names): 1399 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1400 new_idl.append(obs_a.idl[name]) 1401 1402 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1403 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1404 o.reweighted = obs_a.reweighted or obs_b.reweighted 1405 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1408def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1409 r'''Calculates the error covariance matrix of a set of observables. 1410 1411 The gamma method has to be applied first to all observables. 1412 1413 Parameters 1414 ---------- 1415 obs : list or numpy.ndarray 1416 List or one dimensional array of Obs 1417 visualize : bool 1418 If True plots the corresponding normalized correlation matrix (default False). 1419 correlation : bool 1420 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1421 smooth : None or int 1422 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1423 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1424 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1425 small ones. 1426 1427 Notes 1428 ----- 1429 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1430 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1431 in the absence of autocorrelation. 1432 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1433 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1434 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1435 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1436 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1437 ''' 1438 1439 length = len(obs) 1440 1441 max_samples = np.max([o.N for o in obs]) 1442 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1443 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1444 1445 cov = np.zeros((length, length)) 1446 for i in range(length): 1447 for j in range(i, length): 1448 cov[i, j] = _covariance_element(obs[i], obs[j]) 1449 cov = cov + cov.T - np.diag(np.diag(cov)) 1450 1451 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1452 1453 if isinstance(smooth, int): 1454 corr = _smooth_eigenvalues(corr, smooth) 1455 1456 if visualize: 1457 plt.matshow(corr, vmin=-1, vmax=1) 1458 plt.set_cmap('RdBu') 1459 plt.colorbar() 1460 plt.draw() 1461 1462 if correlation is True: 1463 return corr 1464 1465 errors = [o.dvalue for o in obs] 1466 cov = np.diag(errors) @ corr @ np.diag(errors) 1467 1468 eigenvalues = np.linalg.eigh(cov)[0] 1469 if not np.all(eigenvalues >= 0): 1470 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1471 1472 return cov
Calculates the error covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1552def import_jackknife(jacks, name, idl=None): 1553 """Imports jackknife samples and returns an Obs 1554 1555 Parameters 1556 ---------- 1557 jacks : numpy.ndarray 1558 numpy array containing the mean value as zeroth entry and 1559 the N jackknife samples as first to Nth entry. 1560 name : str 1561 name of the ensemble the samples are defined on. 1562 """ 1563 length = len(jacks) - 1 1564 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1565 samples = jacks[1:] @ prj 1566 mean = np.mean(samples) 1567 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1568 new_obs._value = jacks[0] 1569 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1572def merge_obs(list_of_obs): 1573 """Combine all observables in list_of_obs into one new observable 1574 1575 Parameters 1576 ---------- 1577 list_of_obs : list 1578 list of the Obs object to be combined 1579 1580 Notes 1581 ----- 1582 It is not possible to combine obs which are based on the same replicum 1583 """ 1584 replist = [item for obs in list_of_obs for item in obs.names] 1585 if (len(replist) == len(set(replist))) is False: 1586 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1587 if any([len(o.cov_names) for o in list_of_obs]): 1588 raise Exception('Not possible to merge data that contains covobs!') 1589 new_dict = {} 1590 idl_dict = {} 1591 for o in list_of_obs: 1592 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1593 for key in set(o.deltas) | set(o.r_values)}) 1594 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1595 1596 names = sorted(new_dict.keys()) 1597 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1598 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1599 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1600 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1603def cov_Obs(means, cov, name, grad=None): 1604 """Create an Obs based on mean(s) and a covariance matrix 1605 1606 Parameters 1607 ---------- 1608 mean : list of floats or float 1609 N mean value(s) of the new Obs 1610 cov : list or array 1611 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1612 name : str 1613 identifier for the covariance matrix 1614 grad : list or array 1615 Gradient of the Covobs wrt. the means belonging to cov. 1616 """ 1617 1618 def covobs_to_obs(co): 1619 """Make an Obs out of a Covobs 1620 1621 Parameters 1622 ---------- 1623 co : Covobs 1624 Covobs to be embedded into the Obs 1625 """ 1626 o = Obs([], [], means=[]) 1627 o._value = co.value 1628 o.names.append(co.name) 1629 o._covobs[co.name] = co 1630 o._dvalue = np.sqrt(co.errsq()) 1631 return o 1632 1633 ol = [] 1634 if isinstance(means, (float, int)): 1635 means = [means] 1636 1637 for i in range(len(means)): 1638 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1639 if ol[0].covobs[name].N != len(means): 1640 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1641 if len(ol) == 1: 1642 return ol[0] 1643 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.