pyerrors.obs
1import warnings 2import pickle 3from math import gcd 4from functools import reduce 5import numpy as np 6import autograd.numpy as anp # Thinly-wrapped numpy 7from autograd import jacobian 8import matplotlib.pyplot as plt 9from scipy.stats import skew, skewtest, kurtosis, kurtosistest 10import numdifftools as nd 11from itertools import groupby 12from .covobs import Covobs 13 14 15class Obs: 16 """Class for a general observable. 17 18 Instances of Obs are the basic objects of a pyerrors error analysis. 19 They are initialized with a list which contains arrays of samples for 20 different ensembles/replica and another list of same length which contains 21 the names of the ensembles/replica. Mathematical operations can be 22 performed on instances. The result is another instance of Obs. The error of 23 an instance can be computed with the gamma_method. Also contains additional 24 methods for output and visualization of the error calculation. 25 26 Attributes 27 ---------- 28 S_global : float 29 Standard value for S (default 2.0) 30 S_dict : dict 31 Dictionary for S values. If an entry for a given ensemble 32 exists this overwrites the standard value for that ensemble. 33 tau_exp_global : float 34 Standard value for tau_exp (default 0.0) 35 tau_exp_dict : dict 36 Dictionary for tau_exp values. If an entry for a given ensemble exists 37 this overwrites the standard value for that ensemble. 38 N_sigma_global : float 39 Standard value for N_sigma (default 1.0) 40 N_sigma_dict : dict 41 Dictionary for N_sigma values. If an entry for a given ensemble exists 42 this overwrites the standard value for that ensemble. 43 """ 44 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 45 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 46 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 47 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 48 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 49 50 S_global = 2.0 51 S_dict = {} 52 tau_exp_global = 0.0 53 tau_exp_dict = {} 54 N_sigma_global = 1.0 55 N_sigma_dict = {} 56 filter_eps = 1e-10 57 58 def __init__(self, samples, names, idl=None, **kwargs): 59 """ Initialize Obs object. 60 61 Parameters 62 ---------- 63 samples : list 64 list of numpy arrays containing the Monte Carlo samples 65 names : list 66 list of strings labeling the individual samples 67 idl : list, optional 68 list of ranges or lists on which the samples are defined 69 """ 70 71 if kwargs.get("means") is None and len(samples): 72 if len(samples) != len(names): 73 raise Exception('Length of samples and names incompatible.') 74 if idl is not None: 75 if len(idl) != len(names): 76 raise Exception('Length of idl incompatible with samples and names.') 77 name_length = len(names) 78 if name_length > 1: 79 if name_length != len(set(names)): 80 raise Exception('names are not unique.') 81 if not all(isinstance(x, str) for x in names): 82 raise TypeError('All names have to be strings.') 83 else: 84 if not isinstance(names[0], str): 85 raise TypeError('All names have to be strings.') 86 if min(len(x) for x in samples) <= 4: 87 raise Exception('Samples have to have at least 5 entries.') 88 89 self.names = sorted(names) 90 self.shape = {} 91 self.r_values = {} 92 self.deltas = {} 93 self._covobs = {} 94 95 self._value = 0 96 self.N = 0 97 self.is_merged = {} 98 self.idl = {} 99 if idl is not None: 100 for name, idx in sorted(zip(names, idl)): 101 if isinstance(idx, range): 102 self.idl[name] = idx 103 elif isinstance(idx, (list, np.ndarray)): 104 dc = np.unique(np.diff(idx)) 105 if np.any(dc < 0): 106 raise Exception("Unsorted idx for idl[%s]" % (name)) 107 if len(dc) == 1: 108 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 109 else: 110 self.idl[name] = list(idx) 111 else: 112 raise Exception('incompatible type for idl[%s].' % (name)) 113 else: 114 for name, sample in sorted(zip(names, samples)): 115 self.idl[name] = range(1, len(sample) + 1) 116 117 if kwargs.get("means") is not None: 118 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 119 self.shape[name] = len(self.idl[name]) 120 self.N += self.shape[name] 121 self.r_values[name] = mean 122 self.deltas[name] = sample 123 else: 124 for name, sample in sorted(zip(names, samples)): 125 self.shape[name] = len(self.idl[name]) 126 self.N += self.shape[name] 127 if len(sample) != self.shape[name]: 128 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 129 self.r_values[name] = np.mean(sample) 130 self.deltas[name] = sample - self.r_values[name] 131 self._value += self.shape[name] * self.r_values[name] 132 self._value /= self.N 133 134 self._dvalue = 0.0 135 self.ddvalue = 0.0 136 self.reweighted = False 137 138 self.tag = None 139 140 @property 141 def value(self): 142 return self._value 143 144 @property 145 def dvalue(self): 146 return self._dvalue 147 148 @property 149 def e_names(self): 150 return sorted(set([o.split('|')[0] for o in self.names])) 151 152 @property 153 def cov_names(self): 154 return sorted(set([o for o in self.covobs.keys()])) 155 156 @property 157 def mc_names(self): 158 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 159 160 @property 161 def e_content(self): 162 res = {} 163 for e, e_name in enumerate(self.e_names): 164 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 165 if e_name in self.names: 166 res[e_name].append(e_name) 167 return res 168 169 @property 170 def covobs(self): 171 return self._covobs 172 173 def gamma_method(self, **kwargs): 174 """Estimate the error and related properties of the Obs. 175 176 Parameters 177 ---------- 178 S : float 179 specifies a custom value for the parameter S (default 2.0). 180 If set to 0 it is assumed that the data exhibits no 181 autocorrelation. In this case the error estimates coincides 182 with the sample standard error. 183 tau_exp : float 184 positive value triggers the critical slowing down analysis 185 (default 0.0). 186 N_sigma : float 187 number of standard deviations from zero until the tail is 188 attached to the autocorrelation function (default 1). 189 fft : bool 190 determines whether the fft algorithm is used for the computation 191 of the autocorrelation function (default True) 192 """ 193 194 e_content = self.e_content 195 self.e_dvalue = {} 196 self.e_ddvalue = {} 197 self.e_tauint = {} 198 self.e_dtauint = {} 199 self.e_windowsize = {} 200 self.e_n_tauint = {} 201 self.e_n_dtauint = {} 202 e_gamma = {} 203 self.e_rho = {} 204 self.e_drho = {} 205 self._dvalue = 0 206 self.ddvalue = 0 207 208 self.S = {} 209 self.tau_exp = {} 210 self.N_sigma = {} 211 212 if kwargs.get('fft') is False: 213 fft = False 214 else: 215 fft = True 216 217 def _parse_kwarg(kwarg_name): 218 if kwarg_name in kwargs: 219 tmp = kwargs.get(kwarg_name) 220 if isinstance(tmp, (int, float)): 221 if tmp < 0: 222 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 223 for e, e_name in enumerate(self.e_names): 224 getattr(self, kwarg_name)[e_name] = tmp 225 else: 226 raise TypeError(kwarg_name + ' is not in proper format.') 227 else: 228 for e, e_name in enumerate(self.e_names): 229 if e_name in getattr(Obs, kwarg_name + '_dict'): 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 231 else: 232 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 233 234 _parse_kwarg('S') 235 _parse_kwarg('tau_exp') 236 _parse_kwarg('N_sigma') 237 238 for e, e_name in enumerate(self.mc_names): 239 r_length = [] 240 for r_name in e_content[e_name]: 241 if isinstance(self.idl[r_name], range): 242 r_length.append(len(self.idl[r_name])) 243 else: 244 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 245 246 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 247 w_max = max(r_length) // 2 248 e_gamma[e_name] = np.zeros(w_max) 249 self.e_rho[e_name] = np.zeros(w_max) 250 self.e_drho[e_name] = np.zeros(w_max) 251 252 for r_name in e_content[e_name]: 253 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 254 255 gamma_div = np.zeros(w_max) 256 for r_name in e_content[e_name]: 257 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 258 gamma_div[gamma_div < 1] = 1.0 259 e_gamma[e_name] /= gamma_div[:w_max] 260 261 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 262 self.e_tauint[e_name] = 0.5 263 self.e_dtauint[e_name] = 0.0 264 self.e_dvalue[e_name] = 0.0 265 self.e_ddvalue[e_name] = 0.0 266 self.e_windowsize[e_name] = 0 267 continue 268 269 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 270 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 271 # Make sure no entry of tauint is smaller than 0.5 272 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 273 # hep-lat/0306017 eq. (42) 274 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 275 self.e_n_dtauint[e_name][0] = 0.0 276 277 def _compute_drho(i): 278 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 279 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 280 281 _compute_drho(1) 282 if self.tau_exp[e_name] > 0: 283 texp = self.tau_exp[e_name] 284 # Critical slowing down analysis 285 if w_max // 2 <= 1: 286 raise Exception("Need at least 8 samples for tau_exp error analysis") 287 for n in range(1, w_max // 2): 288 _compute_drho(n + 1) 289 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 290 # Bias correction hep-lat/0306017 eq. (49) included 291 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 292 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 293 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 294 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 295 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 296 self.e_windowsize[e_name] = n 297 break 298 else: 299 if self.S[e_name] == 0.0: 300 self.e_tauint[e_name] = 0.5 301 self.e_dtauint[e_name] = 0.0 302 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 303 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 304 self.e_windowsize[e_name] = 0 305 else: 306 # Standard automatic windowing procedure 307 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 308 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 309 for n in range(1, w_max): 310 if n < w_max // 2 - 2: 311 _compute_drho(n + 1) 312 if g_w[n - 1] < 0 or n >= w_max - 1: 313 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 314 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 315 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 316 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 317 self.e_windowsize[e_name] = n 318 break 319 320 self._dvalue += self.e_dvalue[e_name] ** 2 321 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 322 323 for e_name in self.cov_names: 324 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 325 self.e_ddvalue[e_name] = 0 326 self._dvalue += self.e_dvalue[e_name]**2 327 328 self._dvalue = np.sqrt(self._dvalue) 329 if self._dvalue == 0.0: 330 self.ddvalue = 0.0 331 else: 332 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 333 return 334 335 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 336 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 337 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 338 339 Parameters 340 ---------- 341 deltas : list 342 List of fluctuations 343 idx : list 344 List or range of configurations on which the deltas are defined. 345 shape : int 346 Number of configurations in idx. 347 w_max : int 348 Upper bound for the summation window. 349 fft : bool 350 determines whether the fft algorithm is used for the computation 351 of the autocorrelation function. 352 """ 353 gamma = np.zeros(w_max) 354 deltas = _expand_deltas(deltas, idx, shape) 355 new_shape = len(deltas) 356 if fft: 357 max_gamma = min(new_shape, w_max) 358 # The padding for the fft has to be even 359 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 360 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 361 else: 362 for n in range(w_max): 363 if new_shape - n >= 0: 364 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 365 366 return gamma 367 368 def details(self, ens_content=True): 369 """Output detailed properties of the Obs. 370 371 Parameters 372 ---------- 373 ens_content : bool 374 print details about the ensembles and replica if true. 375 """ 376 if self.tag is not None: 377 print("Description:", self.tag) 378 if not hasattr(self, 'e_dvalue'): 379 print('Result\t %3.8e' % (self.value)) 380 else: 381 if self.value == 0.0: 382 percentage = np.nan 383 else: 384 percentage = np.abs(self._dvalue / self.value) * 100 385 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 386 if len(self.e_names) > 1: 387 print(' Ensemble errors:') 388 for e_name in self.mc_names: 389 if len(self.e_names) > 1: 390 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 391 if self.tau_exp[e_name] > 0: 392 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 393 else: 394 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 395 for e_name in self.cov_names: 396 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 397 if ens_content is True: 398 if len(self.e_names) == 1: 399 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 400 else: 401 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 402 my_string_list = [] 403 for key, value in sorted(self.e_content.items()): 404 if key not in self.covobs: 405 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 406 if len(value) == 1: 407 my_string += f': {self.shape[value[0]]} configurations' 408 if isinstance(self.idl[value[0]], range): 409 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 410 else: 411 my_string += ' (irregular range)' 412 else: 413 sublist = [] 414 for v in value: 415 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 416 my_substring += f': {self.shape[v]} configurations' 417 if isinstance(self.idl[v], range): 418 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 419 else: 420 my_substring += ' (irregular range)' 421 sublist.append(my_substring) 422 423 my_string += '\n' + '\n'.join(sublist) 424 else: 425 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 426 my_string_list.append(my_string) 427 print('\n'.join(my_string_list)) 428 429 def reweight(self, weight): 430 """Reweight the obs with given rewighting factors. 431 432 Parameters 433 ---------- 434 weight : Obs 435 Reweighting factor. An Observable that has to be defined on a superset of the 436 configurations in obs[i].idl for all i. 437 all_configs : bool 438 if True, the reweighted observables are normalized by the average of 439 the reweighting factor on all configurations in weight.idl and not 440 on the configurations in obs[i].idl. Default False. 441 """ 442 return reweight(weight, [self])[0] 443 444 def is_zero_within_error(self, sigma=1): 445 """Checks whether the observable is zero within 'sigma' standard errors. 446 447 Parameters 448 ---------- 449 sigma : int 450 Number of standard errors used for the check. 451 452 Works only properly when the gamma method was run. 453 """ 454 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 455 456 def is_zero(self, atol=1e-10): 457 """Checks whether the observable is zero within a given tolerance. 458 459 Parameters 460 ---------- 461 atol : float 462 Absolute tolerance (for details see numpy documentation). 463 """ 464 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 465 466 def plot_tauint(self, save=None): 467 """Plot integrated autocorrelation time for each ensemble. 468 469 Parameters 470 ---------- 471 save : str 472 saves the figure to a file named 'save' if. 473 """ 474 if not hasattr(self, 'e_dvalue'): 475 raise Exception('Run the gamma method first.') 476 477 for e, e_name in enumerate(self.mc_names): 478 fig = plt.figure() 479 plt.xlabel(r'$W$') 480 plt.ylabel(r'$\tau_\mathrm{int}$') 481 length = int(len(self.e_n_tauint[e_name])) 482 if self.tau_exp[e_name] > 0: 483 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 484 x_help = np.arange(2 * self.tau_exp[e_name]) 485 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 486 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 487 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 488 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 489 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 490 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 491 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 492 else: 493 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 494 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 495 496 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 497 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 498 plt.legend() 499 plt.xlim(-0.5, xmax) 500 ylim = plt.ylim() 501 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 502 plt.draw() 503 if save: 504 fig.savefig(save + "_" + str(e)) 505 506 def plot_rho(self, save=None): 507 """Plot normalized autocorrelation function time for each ensemble. 508 509 Parameters 510 ---------- 511 save : str 512 saves the figure to a file named 'save' if. 513 """ 514 if not hasattr(self, 'e_dvalue'): 515 raise Exception('Run the gamma method first.') 516 for e, e_name in enumerate(self.mc_names): 517 fig = plt.figure() 518 plt.xlabel('W') 519 plt.ylabel('rho') 520 length = int(len(self.e_drho[e_name])) 521 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 522 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 523 if self.tau_exp[e_name] > 0: 524 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 525 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 526 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 527 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 528 else: 529 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 530 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 531 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 532 plt.xlim(-0.5, xmax) 533 plt.draw() 534 if save: 535 fig.savefig(save + "_" + str(e)) 536 537 def plot_rep_dist(self): 538 """Plot replica distribution for each ensemble with more than one replicum.""" 539 if not hasattr(self, 'e_dvalue'): 540 raise Exception('Run the gamma method first.') 541 for e, e_name in enumerate(self.mc_names): 542 if len(self.e_content[e_name]) == 1: 543 print('No replica distribution for a single replicum (', e_name, ')') 544 continue 545 r_length = [] 546 sub_r_mean = 0 547 for r, r_name in enumerate(self.e_content[e_name]): 548 r_length.append(len(self.deltas[r_name])) 549 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 550 e_N = np.sum(r_length) 551 sub_r_mean /= e_N 552 arr = np.zeros(len(self.e_content[e_name])) 553 for r, r_name in enumerate(self.e_content[e_name]): 554 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 555 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 556 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 557 plt.draw() 558 559 def plot_history(self, expand=True): 560 """Plot derived Monte Carlo history for each ensemble 561 562 Parameters 563 ---------- 564 expand : bool 565 show expanded history for irregular Monte Carlo chains (default: True). 566 """ 567 for e, e_name in enumerate(self.mc_names): 568 plt.figure() 569 r_length = [] 570 tmp = [] 571 tmp_expanded = [] 572 for r, r_name in enumerate(self.e_content[e_name]): 573 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 574 if expand: 575 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 576 r_length.append(len(tmp_expanded[-1])) 577 else: 578 r_length.append(len(tmp[-1])) 579 e_N = np.sum(r_length) 580 x = np.arange(e_N) 581 y_test = np.concatenate(tmp, axis=0) 582 if expand: 583 y = np.concatenate(tmp_expanded, axis=0) 584 else: 585 y = y_test 586 plt.errorbar(x, y, fmt='.', markersize=3) 587 plt.xlim(-0.5, e_N - 0.5) 588 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 589 plt.draw() 590 591 def plot_piechart(self, save=None): 592 """Plot piechart which shows the fractional contribution of each 593 ensemble to the error and returns a dictionary containing the fractions. 594 595 Parameters 596 ---------- 597 save : str 598 saves the figure to a file named 'save' if. 599 """ 600 if not hasattr(self, 'e_dvalue'): 601 raise Exception('Run the gamma method first.') 602 if np.isclose(0.0, self._dvalue, atol=1e-15): 603 raise Exception('Error is 0.0') 604 labels = self.e_names 605 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 606 fig1, ax1 = plt.subplots() 607 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 608 ax1.axis('equal') 609 plt.draw() 610 if save: 611 fig1.savefig(save) 612 613 return dict(zip(self.e_names, sizes)) 614 615 def dump(self, filename, datatype="json.gz", description="", **kwargs): 616 """Dump the Obs to a file 'name' of chosen format. 617 618 Parameters 619 ---------- 620 filename : str 621 name of the file to be saved. 622 datatype : str 623 Format of the exported file. Supported formats include 624 "json.gz" and "pickle" 625 description : str 626 Description for output file, only relevant for json.gz format. 627 path : str 628 specifies a custom path for the file (default '.') 629 """ 630 if 'path' in kwargs: 631 file_name = kwargs.get('path') + '/' + filename 632 else: 633 file_name = filename 634 635 if datatype == "json.gz": 636 from .input.json import dump_to_json 637 dump_to_json([self], file_name, description=description) 638 elif datatype == "pickle": 639 with open(file_name + '.p', 'wb') as fb: 640 pickle.dump(self, fb) 641 else: 642 raise Exception("Unknown datatype " + str(datatype)) 643 644 def export_jackknife(self): 645 """Export jackknife samples from the Obs 646 647 Returns 648 ------- 649 numpy.ndarray 650 Returns a numpy array of length N + 1 where N is the number of samples 651 for the given ensemble and replicum. The zeroth entry of the array contains 652 the mean value of the Obs, entries 1 to N contain the N jackknife samples 653 derived from the Obs. The current implementation only works for observables 654 defined on exactly one ensemble and replicum. The derived jackknife samples 655 should agree with samples from a full jackknife analysis up to O(1/N). 656 """ 657 658 if len(self.names) != 1: 659 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 660 661 name = self.names[0] 662 full_data = self.deltas[name] + self.r_values[name] 663 n = full_data.size 664 mean = self.value 665 tmp_jacks = np.zeros(n + 1) 666 tmp_jacks[0] = mean 667 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 668 return tmp_jacks 669 670 def __float__(self): 671 return float(self.value) 672 673 def __repr__(self): 674 return 'Obs[' + str(self) + ']' 675 676 def __str__(self): 677 if self._dvalue == 0.0: 678 return str(self.value) 679 fexp = np.floor(np.log10(self._dvalue)) 680 if fexp < 0.0: 681 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 682 elif fexp == 0.0: 683 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 684 else: 685 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 686 687 # Overload comparisons 688 def __lt__(self, other): 689 return self.value < other 690 691 def __le__(self, other): 692 return self.value <= other 693 694 def __gt__(self, other): 695 return self.value > other 696 697 def __ge__(self, other): 698 return self.value >= other 699 700 def __eq__(self, other): 701 return (self - other).is_zero() 702 703 def __ne__(self, other): 704 return not (self - other).is_zero() 705 706 # Overload math operations 707 def __add__(self, y): 708 if isinstance(y, Obs): 709 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 710 else: 711 if isinstance(y, np.ndarray): 712 return np.array([self + o for o in y]) 713 elif y.__class__.__name__ in ['Corr', 'CObs']: 714 return NotImplemented 715 else: 716 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 717 718 def __radd__(self, y): 719 return self + y 720 721 def __mul__(self, y): 722 if isinstance(y, Obs): 723 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 724 else: 725 if isinstance(y, np.ndarray): 726 return np.array([self * o for o in y]) 727 elif isinstance(y, complex): 728 return CObs(self * y.real, self * y.imag) 729 elif y.__class__.__name__ in ['Corr', 'CObs']: 730 return NotImplemented 731 else: 732 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 733 734 def __rmul__(self, y): 735 return self * y 736 737 def __sub__(self, y): 738 if isinstance(y, Obs): 739 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 740 else: 741 if isinstance(y, np.ndarray): 742 return np.array([self - o for o in y]) 743 elif y.__class__.__name__ in ['Corr', 'CObs']: 744 return NotImplemented 745 else: 746 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 747 748 def __rsub__(self, y): 749 return -1 * (self - y) 750 751 def __pos__(self): 752 return self 753 754 def __neg__(self): 755 return -1 * self 756 757 def __truediv__(self, y): 758 if isinstance(y, Obs): 759 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 760 else: 761 if isinstance(y, np.ndarray): 762 return np.array([self / o for o in y]) 763 elif y.__class__.__name__ in ['Corr', 'CObs']: 764 return NotImplemented 765 else: 766 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 767 768 def __rtruediv__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 771 else: 772 if isinstance(y, np.ndarray): 773 return np.array([o / self for o in y]) 774 elif y.__class__.__name__ in ['Corr', 'CObs']: 775 return NotImplemented 776 else: 777 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 778 779 def __pow__(self, y): 780 if isinstance(y, Obs): 781 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 782 else: 783 return derived_observable(lambda x: x[0] ** y, [self]) 784 785 def __rpow__(self, y): 786 if isinstance(y, Obs): 787 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 788 else: 789 return derived_observable(lambda x: y ** x[0], [self]) 790 791 def __abs__(self): 792 return derived_observable(lambda x: anp.abs(x[0]), [self]) 793 794 # Overload numpy functions 795 def sqrt(self): 796 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 797 798 def log(self): 799 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 800 801 def exp(self): 802 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 803 804 def sin(self): 805 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 806 807 def cos(self): 808 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 809 810 def tan(self): 811 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 812 813 def arcsin(self): 814 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 815 816 def arccos(self): 817 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 818 819 def arctan(self): 820 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 821 822 def sinh(self): 823 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 824 825 def cosh(self): 826 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 827 828 def tanh(self): 829 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 830 831 def arcsinh(self): 832 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 833 834 def arccosh(self): 835 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 836 837 def arctanh(self): 838 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 839 840 841class CObs: 842 """Class for a complex valued observable.""" 843 __slots__ = ['_real', '_imag', 'tag'] 844 845 def __init__(self, real, imag=0.0): 846 self._real = real 847 self._imag = imag 848 self.tag = None 849 850 @property 851 def real(self): 852 return self._real 853 854 @property 855 def imag(self): 856 return self._imag 857 858 def gamma_method(self, **kwargs): 859 """Executes the gamma_method for the real and the imaginary part.""" 860 if isinstance(self.real, Obs): 861 self.real.gamma_method(**kwargs) 862 if isinstance(self.imag, Obs): 863 self.imag.gamma_method(**kwargs) 864 865 def is_zero(self): 866 """Checks whether both real and imaginary part are zero within machine precision.""" 867 return self.real == 0.0 and self.imag == 0.0 868 869 def conjugate(self): 870 return CObs(self.real, -self.imag) 871 872 def __add__(self, other): 873 if isinstance(other, np.ndarray): 874 return other + self 875 elif hasattr(other, 'real') and hasattr(other, 'imag'): 876 return CObs(self.real + other.real, 877 self.imag + other.imag) 878 else: 879 return CObs(self.real + other, self.imag) 880 881 def __radd__(self, y): 882 return self + y 883 884 def __sub__(self, other): 885 if isinstance(other, np.ndarray): 886 return -1 * (other - self) 887 elif hasattr(other, 'real') and hasattr(other, 'imag'): 888 return CObs(self.real - other.real, self.imag - other.imag) 889 else: 890 return CObs(self.real - other, self.imag) 891 892 def __rsub__(self, other): 893 return -1 * (self - other) 894 895 def __mul__(self, other): 896 if isinstance(other, np.ndarray): 897 return other * self 898 elif hasattr(other, 'real') and hasattr(other, 'imag'): 899 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 900 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 901 [self.real, other.real, self.imag, other.imag], 902 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 903 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 904 [self.real, other.real, self.imag, other.imag], 905 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 906 elif getattr(other, 'imag', 0) != 0: 907 return CObs(self.real * other.real - self.imag * other.imag, 908 self.imag * other.real + self.real * other.imag) 909 else: 910 return CObs(self.real * other.real, self.imag * other.real) 911 else: 912 return CObs(self.real * other, self.imag * other) 913 914 def __rmul__(self, other): 915 return self * other 916 917 def __truediv__(self, other): 918 if isinstance(other, np.ndarray): 919 return 1 / (other / self) 920 elif hasattr(other, 'real') and hasattr(other, 'imag'): 921 r = other.real ** 2 + other.imag ** 2 922 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 923 else: 924 return CObs(self.real / other, self.imag / other) 925 926 def __rtruediv__(self, other): 927 r = self.real ** 2 + self.imag ** 2 928 if hasattr(other, 'real') and hasattr(other, 'imag'): 929 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 930 else: 931 return CObs(self.real * other / r, -self.imag * other / r) 932 933 def __abs__(self): 934 return np.sqrt(self.real**2 + self.imag**2) 935 936 def __pos__(self): 937 return self 938 939 def __neg__(self): 940 return -1 * self 941 942 def __eq__(self, other): 943 return self.real == other.real and self.imag == other.imag 944 945 def __str__(self): 946 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 947 948 def __repr__(self): 949 return 'CObs[' + str(self) + ']' 950 951 952def _expand_deltas(deltas, idx, shape): 953 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 954 If idx is of type range, the deltas are not changed 955 956 Parameters 957 ---------- 958 deltas : list 959 List of fluctuations 960 idx : list 961 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 962 shape : int 963 Number of configs in idx. 964 """ 965 if isinstance(idx, range): 966 return deltas 967 else: 968 ret = np.zeros(idx[-1] - idx[0] + 1) 969 for i in range(shape): 970 ret[idx[i] - idx[0]] = deltas[i] 971 return ret 972 973 974def _merge_idx(idl): 975 """Returns the union of all lists in idl as sorted list 976 977 Parameters 978 ---------- 979 idl : list 980 List of lists or ranges. 981 """ 982 983 # Use groupby to efficiently check whether all elements of idl are identical 984 try: 985 g = groupby(idl) 986 if next(g, True) and not next(g, False): 987 return idl[0] 988 except Exception: 989 pass 990 991 if np.all([type(idx) is range for idx in idl]): 992 if len(set([idx[0] for idx in idl])) == 1: 993 idstart = min([idx.start for idx in idl]) 994 idstop = max([idx.stop for idx in idl]) 995 idstep = min([idx.step for idx in idl]) 996 return range(idstart, idstop, idstep) 997 998 return sorted(set().union(*idl)) 999 1000 1001def _intersection_idx(idl): 1002 """Returns the intersection of all lists in idl as sorted list 1003 1004 Parameters 1005 ---------- 1006 idl : list 1007 List of lists or ranges. 1008 """ 1009 1010 def _lcm(*args): 1011 """Returns the lowest common multiple of args. 1012 1013 From python 3.9 onwards the math library contains an lcm function.""" 1014 return reduce(lambda a, b: a * b // gcd(a, b), args) 1015 1016 # Use groupby to efficiently check whether all elements of idl are identical 1017 try: 1018 g = groupby(idl) 1019 if next(g, True) and not next(g, False): 1020 return idl[0] 1021 except Exception: 1022 pass 1023 1024 if np.all([type(idx) is range for idx in idl]): 1025 if len(set([idx[0] for idx in idl])) == 1: 1026 idstart = max([idx.start for idx in idl]) 1027 idstop = min([idx.stop for idx in idl]) 1028 idstep = _lcm(*[idx.step for idx in idl]) 1029 return range(idstart, idstop, idstep) 1030 1031 return sorted(set.intersection(*[set(o) for o in idl])) 1032 1033 1034def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 1035 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 1036 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 1037 common divisor of the step sizes is used as new step size. 1038 1039 Parameters 1040 ---------- 1041 deltas : list 1042 List of fluctuations 1043 idx : list 1044 List or range of configs on which the deltas are defined. 1045 Has to be a subset of new_idx and has to be sorted in ascending order. 1046 shape : list 1047 Number of configs in idx. 1048 new_idx : list 1049 List of configs that defines the new range, has to be sorted in ascending order. 1050 """ 1051 1052 if type(idx) is range and type(new_idx) is range: 1053 if idx == new_idx: 1054 return deltas 1055 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1056 for i in range(shape): 1057 ret[idx[i] - new_idx[0]] = deltas[i] 1058 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1059 1060 1061def _collapse_deltas_for_merge(deltas, idx, shape, new_idx): 1062 """Collapse deltas defined on idx to the list of configs that is defined by new_idx. 1063 If idx and new_idx are of type range, the smallest 1064 common divisor of the step sizes is used as new step size. 1065 1066 Parameters 1067 ---------- 1068 deltas : list 1069 List of fluctuations 1070 idx : list 1071 List or range of configs on which the deltas are defined. 1072 Has to be a subset of new_idx and has to be sorted in ascending order. 1073 shape : list 1074 Number of configs in idx. 1075 new_idx : list 1076 List of configs that defines the new range, has to be sorted in ascending order. 1077 """ 1078 1079 if type(idx) is range and type(new_idx) is range: 1080 if idx == new_idx: 1081 return deltas 1082 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1083 for i in range(shape): 1084 if idx[i] in new_idx: 1085 ret[idx[i] - new_idx[0]] = deltas[i] 1086 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1087 1088 1089def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1090 """Filter out all configurations with vanishing fluctuation such that they do not 1091 contribute to the error estimate anymore. Returns the new deltas and 1092 idx according to the filtering. 1093 A fluctuation is considered to be vanishing, if it is smaller than eps times 1094 the mean of the absolute values of all deltas in one list. 1095 1096 Parameters 1097 ---------- 1098 deltas : list 1099 List of fluctuations 1100 idx : list 1101 List or ranges of configs on which the deltas are defined. 1102 eps : float 1103 Prefactor that enters the filter criterion. 1104 """ 1105 new_deltas = [] 1106 new_idx = [] 1107 maxd = np.mean(np.fabs(deltas)) 1108 for i in range(len(deltas)): 1109 if abs(deltas[i]) > eps * maxd: 1110 new_deltas.append(deltas[i]) 1111 new_idx.append(idx[i]) 1112 if new_idx: 1113 return np.array(new_deltas), new_idx 1114 else: 1115 return deltas, idx 1116 1117 1118def derived_observable(func, data, array_mode=False, **kwargs): 1119 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1120 1121 Parameters 1122 ---------- 1123 func : object 1124 arbitrary function of the form func(data, **kwargs). For the 1125 automatic differentiation to work, all numpy functions have to have 1126 the autograd wrapper (use 'import autograd.numpy as anp'). 1127 data : list 1128 list of Obs, e.g. [obs1, obs2, obs3]. 1129 num_grad : bool 1130 if True, numerical derivatives are used instead of autograd 1131 (default False). To control the numerical differentiation the 1132 kwargs of numdifftools.step_generators.MaxStepGenerator 1133 can be used. 1134 man_grad : list 1135 manually supply a list or an array which contains the jacobian 1136 of func. Use cautiously, supplying the wrong derivative will 1137 not be intercepted. 1138 1139 Notes 1140 ----- 1141 For simple mathematical operations it can be practical to use anonymous 1142 functions. For the ratio of two observables one can e.g. use 1143 1144 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1145 """ 1146 1147 data = np.asarray(data) 1148 raveled_data = data.ravel() 1149 1150 # Workaround for matrix operations containing non Obs data 1151 if not all(isinstance(x, Obs) for x in raveled_data): 1152 for i in range(len(raveled_data)): 1153 if isinstance(raveled_data[i], (int, float)): 1154 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1155 1156 allcov = {} 1157 for o in raveled_data: 1158 for name in o.cov_names: 1159 if name in allcov: 1160 if not np.allclose(allcov[name], o.covobs[name].cov): 1161 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1162 else: 1163 allcov[name] = o.covobs[name].cov 1164 1165 n_obs = len(raveled_data) 1166 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1167 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1168 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1169 1170 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1171 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1172 1173 if data.ndim == 1: 1174 values = np.array([o.value for o in data]) 1175 else: 1176 values = np.vectorize(lambda x: x.value)(data) 1177 1178 new_values = func(values, **kwargs) 1179 1180 multi = int(isinstance(new_values, np.ndarray)) 1181 1182 new_r_values = {} 1183 new_idl_d = {} 1184 for name in new_sample_names: 1185 idl = [] 1186 tmp_values = np.zeros(n_obs) 1187 for i, item in enumerate(raveled_data): 1188 tmp_values[i] = item.r_values.get(name, item.value) 1189 tmp_idl = item.idl.get(name) 1190 if tmp_idl is not None: 1191 idl.append(tmp_idl) 1192 if multi > 0: 1193 tmp_values = np.array(tmp_values).reshape(data.shape) 1194 new_r_values[name] = func(tmp_values, **kwargs) 1195 new_idl_d[name] = _merge_idx(idl) 1196 if not is_merged[name]: 1197 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1198 1199 if 'man_grad' in kwargs: 1200 deriv = np.asarray(kwargs.get('man_grad')) 1201 if new_values.shape + data.shape != deriv.shape: 1202 raise Exception('Manual derivative does not have correct shape.') 1203 elif kwargs.get('num_grad') is True: 1204 if multi > 0: 1205 raise Exception('Multi mode currently not supported for numerical derivative') 1206 options = { 1207 'base_step': 0.1, 1208 'step_ratio': 2.5} 1209 for key in options.keys(): 1210 kwarg = kwargs.get(key) 1211 if kwarg is not None: 1212 options[key] = kwarg 1213 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1214 if tmp_df.size == 1: 1215 deriv = np.array([tmp_df.real]) 1216 else: 1217 deriv = tmp_df.real 1218 else: 1219 deriv = jacobian(func)(values, **kwargs) 1220 1221 final_result = np.zeros(new_values.shape, dtype=object) 1222 1223 if array_mode is True: 1224 1225 class _Zero_grad(): 1226 def __init__(self, N): 1227 self.grad = np.zeros((N, 1)) 1228 1229 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1230 d_extracted = {} 1231 g_extracted = {} 1232 for name in new_sample_names: 1233 d_extracted[name] = [] 1234 ens_length = len(new_idl_d[name]) 1235 for i_dat, dat in enumerate(data): 1236 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1237 for name in new_cov_names: 1238 g_extracted[name] = [] 1239 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1240 for i_dat, dat in enumerate(data): 1241 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1242 1243 for i_val, new_val in np.ndenumerate(new_values): 1244 new_deltas = {} 1245 new_grad = {} 1246 if array_mode is True: 1247 for name in new_sample_names: 1248 ens_length = d_extracted[name][0].shape[-1] 1249 new_deltas[name] = np.zeros(ens_length) 1250 for i_dat, dat in enumerate(d_extracted[name]): 1251 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1252 for name in new_cov_names: 1253 new_grad[name] = 0 1254 for i_dat, dat in enumerate(g_extracted[name]): 1255 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1256 else: 1257 for j_obs, obs in np.ndenumerate(data): 1258 for name in obs.names: 1259 if name in obs.cov_names: 1260 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1261 else: 1262 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1263 1264 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1265 1266 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1267 raise Exception('The same name has been used for deltas and covobs!') 1268 new_samples = [] 1269 new_means = [] 1270 new_idl = [] 1271 new_names_obs = [] 1272 for name in new_names: 1273 if name not in new_covobs: 1274 if is_merged[name]: 1275 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1276 else: 1277 filtered_deltas = new_deltas[name] 1278 filtered_idl_d = new_idl_d[name] 1279 1280 new_samples.append(filtered_deltas) 1281 new_idl.append(filtered_idl_d) 1282 new_means.append(new_r_values[name][i_val]) 1283 new_names_obs.append(name) 1284 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1285 for name in new_covobs: 1286 final_result[i_val].names.append(name) 1287 final_result[i_val]._covobs = new_covobs 1288 final_result[i_val]._value = new_val 1289 final_result[i_val].is_merged = is_merged 1290 final_result[i_val].reweighted = reweighted 1291 1292 if multi == 0: 1293 final_result = final_result.item() 1294 1295 return final_result 1296 1297 1298def _reduce_deltas(deltas, idx_old, idx_new): 1299 """Extract deltas defined on idx_old on all configs of idx_new. 1300 1301 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1302 are ordered in an ascending order. 1303 1304 Parameters 1305 ---------- 1306 deltas : list 1307 List of fluctuations 1308 idx_old : list 1309 List or range of configs on which the deltas are defined 1310 idx_new : list 1311 List of configs for which we want to extract the deltas. 1312 Has to be a subset of idx_old. 1313 """ 1314 if not len(deltas) == len(idx_old): 1315 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1316 if type(idx_old) is range and type(idx_new) is range: 1317 if idx_old == idx_new: 1318 return deltas 1319 shape = len(idx_new) 1320 ret = np.zeros(shape) 1321 oldpos = 0 1322 for i in range(shape): 1323 pos = -1 1324 for j in range(oldpos, len(idx_old)): 1325 if idx_old[j] == idx_new[i]: 1326 pos = j 1327 break 1328 if pos < 0: 1329 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1330 ret[i] = deltas[pos] 1331 oldpos = pos 1332 return np.array(ret) 1333 1334 1335def reweight(weight, obs, **kwargs): 1336 """Reweight a list of observables. 1337 1338 Parameters 1339 ---------- 1340 weight : Obs 1341 Reweighting factor. An Observable that has to be defined on a superset of the 1342 configurations in obs[i].idl for all i. 1343 obs : list 1344 list of Obs, e.g. [obs1, obs2, obs3]. 1345 all_configs : bool 1346 if True, the reweighted observables are normalized by the average of 1347 the reweighting factor on all configurations in weight.idl and not 1348 on the configurations in obs[i].idl. Default False. 1349 """ 1350 result = [] 1351 for i in range(len(obs)): 1352 if len(obs[i].cov_names): 1353 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1354 if not set(obs[i].names).issubset(weight.names): 1355 raise Exception('Error: Ensembles do not fit') 1356 for name in obs[i].names: 1357 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1358 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1359 new_samples = [] 1360 w_deltas = {} 1361 for name in sorted(obs[i].names): 1362 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1363 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1364 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1365 1366 if kwargs.get('all_configs'): 1367 new_weight = weight 1368 else: 1369 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1370 1371 result.append(tmp_obs / new_weight) 1372 result[-1].reweighted = True 1373 result[-1].is_merged = obs[i].is_merged 1374 1375 return result 1376 1377 1378def correlate(obs_a, obs_b): 1379 """Correlate two observables. 1380 1381 Parameters 1382 ---------- 1383 obs_a : Obs 1384 First observable 1385 obs_b : Obs 1386 Second observable 1387 1388 Notes 1389 ----- 1390 Keep in mind to only correlate primary observables which have not been reweighted 1391 yet. The reweighting has to be applied after correlating the observables. 1392 Currently only works if ensembles are identical (this is not strictly necessary). 1393 """ 1394 1395 if sorted(obs_a.names) != sorted(obs_b.names): 1396 raise Exception('Ensembles do not fit') 1397 if len(obs_a.cov_names) or len(obs_b.cov_names): 1398 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1399 for name in obs_a.names: 1400 if obs_a.shape[name] != obs_b.shape[name]: 1401 raise Exception('Shapes of ensemble', name, 'do not fit') 1402 if obs_a.idl[name] != obs_b.idl[name]: 1403 raise Exception('idl of ensemble', name, 'do not fit') 1404 1405 if obs_a.reweighted is True: 1406 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1407 if obs_b.reweighted is True: 1408 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1409 1410 new_samples = [] 1411 new_idl = [] 1412 for name in sorted(obs_a.names): 1413 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1414 new_idl.append(obs_a.idl[name]) 1415 1416 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1417 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1418 o.reweighted = obs_a.reweighted or obs_b.reweighted 1419 return o 1420 1421 1422def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1423 r'''Calculates the error covariance matrix of a set of observables. 1424 1425 The gamma method has to be applied first to all observables. 1426 1427 Parameters 1428 ---------- 1429 obs : list or numpy.ndarray 1430 List or one dimensional array of Obs 1431 visualize : bool 1432 If True plots the corresponding normalized correlation matrix (default False). 1433 correlation : bool 1434 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1435 smooth : None or int 1436 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1437 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1438 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1439 small ones. 1440 1441 Notes 1442 ----- 1443 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1444 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1445 in the absence of autocorrelation. 1446 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1447 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1448 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1449 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1450 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1451 ''' 1452 1453 length = len(obs) 1454 1455 max_samples = np.max([o.N for o in obs]) 1456 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1457 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1458 1459 cov = np.zeros((length, length)) 1460 for i in range(length): 1461 for j in range(i, length): 1462 cov[i, j] = _covariance_element(obs[i], obs[j]) 1463 cov = cov + cov.T - np.diag(np.diag(cov)) 1464 1465 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1466 1467 if isinstance(smooth, int): 1468 corr = _smooth_eigenvalues(corr, smooth) 1469 1470 if visualize: 1471 plt.matshow(corr, vmin=-1, vmax=1) 1472 plt.set_cmap('RdBu') 1473 plt.colorbar() 1474 plt.draw() 1475 1476 if correlation is True: 1477 return corr 1478 1479 errors = [o.dvalue for o in obs] 1480 cov = np.diag(errors) @ corr @ np.diag(errors) 1481 1482 eigenvalues = np.linalg.eigh(cov)[0] 1483 if not np.all(eigenvalues >= 0): 1484 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1485 1486 return cov 1487 1488 1489def _smooth_eigenvalues(corr, E): 1490 """Eigenvalue smoothing as described in hep-lat/9412087 1491 1492 corr : np.ndarray 1493 correlation matrix 1494 E : integer 1495 Number of eigenvalues to be left substantially unchanged 1496 """ 1497 if not (2 < E < corr.shape[0] - 1): 1498 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1499 vals, vec = np.linalg.eigh(corr) 1500 lambda_min = np.mean(vals[:-E]) 1501 vals[vals < lambda_min] = lambda_min 1502 vals /= np.mean(vals) 1503 return vec @ np.diag(vals) @ vec.T 1504 1505 1506def _covariance_element(obs1, obs2): 1507 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1508 1509 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1510 deltas1 = _collapse_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1511 deltas2 = _collapse_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1512 return np.sum(deltas1 * deltas2) 1513 1514 if set(obs1.names).isdisjoint(set(obs2.names)): 1515 return 0.0 1516 1517 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1518 raise Exception('The gamma method has to be applied to both Obs first.') 1519 1520 dvalue = 0.0 1521 1522 for e_name in obs1.mc_names: 1523 1524 if e_name not in obs2.mc_names: 1525 continue 1526 1527 idl_d = {} 1528 for r_name in obs1.e_content[e_name]: 1529 if r_name not in obs2.e_content[e_name]: 1530 continue 1531 idl_d[r_name] = _intersection_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1532 1533 gamma = 0.0 1534 1535 for r_name in obs1.e_content[e_name]: 1536 if r_name not in obs2.e_content[e_name]: 1537 continue 1538 if len(idl_d[r_name]) == 0: 1539 continue 1540 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1541 1542 if gamma == 0.0: 1543 continue 1544 1545 gamma_div = 0.0 1546 for r_name in obs1.e_content[e_name]: 1547 if r_name not in obs2.e_content[e_name]: 1548 continue 1549 if len(idl_d[r_name]) == 0: 1550 continue 1551 gamma_div += np.sqrt(calc_gamma(obs1.deltas[r_name], obs1.deltas[r_name], obs1.idl[r_name], obs1.idl[r_name], idl_d[r_name]) * calc_gamma(obs2.deltas[r_name], obs2.deltas[r_name], obs2.idl[r_name], obs2.idl[r_name], idl_d[r_name])) 1552 gamma /= gamma_div 1553 1554 dvalue += gamma 1555 1556 for e_name in obs1.cov_names: 1557 1558 if e_name not in obs2.cov_names: 1559 continue 1560 1561 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1562 1563 return dvalue 1564 1565 1566def import_jackknife(jacks, name, idl=None): 1567 """Imports jackknife samples and returns an Obs 1568 1569 Parameters 1570 ---------- 1571 jacks : numpy.ndarray 1572 numpy array containing the mean value as zeroth entry and 1573 the N jackknife samples as first to Nth entry. 1574 name : str 1575 name of the ensemble the samples are defined on. 1576 """ 1577 length = len(jacks) - 1 1578 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1579 samples = jacks[1:] @ prj 1580 mean = np.mean(samples) 1581 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1582 new_obs._value = jacks[0] 1583 return new_obs 1584 1585 1586def merge_obs(list_of_obs): 1587 """Combine all observables in list_of_obs into one new observable 1588 1589 Parameters 1590 ---------- 1591 list_of_obs : list 1592 list of the Obs object to be combined 1593 1594 Notes 1595 ----- 1596 It is not possible to combine obs which are based on the same replicum 1597 """ 1598 replist = [item for obs in list_of_obs for item in obs.names] 1599 if (len(replist) == len(set(replist))) is False: 1600 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1601 if any([len(o.cov_names) for o in list_of_obs]): 1602 raise Exception('Not possible to merge data that contains covobs!') 1603 new_dict = {} 1604 idl_dict = {} 1605 for o in list_of_obs: 1606 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1607 for key in set(o.deltas) | set(o.r_values)}) 1608 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1609 1610 names = sorted(new_dict.keys()) 1611 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1612 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1613 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1614 return o 1615 1616 1617def cov_Obs(means, cov, name, grad=None): 1618 """Create an Obs based on mean(s) and a covariance matrix 1619 1620 Parameters 1621 ---------- 1622 mean : list of floats or float 1623 N mean value(s) of the new Obs 1624 cov : list or array 1625 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1626 name : str 1627 identifier for the covariance matrix 1628 grad : list or array 1629 Gradient of the Covobs wrt. the means belonging to cov. 1630 """ 1631 1632 def covobs_to_obs(co): 1633 """Make an Obs out of a Covobs 1634 1635 Parameters 1636 ---------- 1637 co : Covobs 1638 Covobs to be embedded into the Obs 1639 """ 1640 o = Obs([], [], means=[]) 1641 o._value = co.value 1642 o.names.append(co.name) 1643 o._covobs[co.name] = co 1644 o._dvalue = np.sqrt(co.errsq()) 1645 return o 1646 1647 ol = [] 1648 if isinstance(means, (float, int)): 1649 means = [means] 1650 1651 for i in range(len(means)): 1652 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1653 if ol[0].covobs[name].N != len(means): 1654 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1655 if len(ol) == 1: 1656 return ol[0] 1657 return ol
16class Obs: 17 """Class for a general observable. 18 19 Instances of Obs are the basic objects of a pyerrors error analysis. 20 They are initialized with a list which contains arrays of samples for 21 different ensembles/replica and another list of same length which contains 22 the names of the ensembles/replica. Mathematical operations can be 23 performed on instances. The result is another instance of Obs. The error of 24 an instance can be computed with the gamma_method. Also contains additional 25 methods for output and visualization of the error calculation. 26 27 Attributes 28 ---------- 29 S_global : float 30 Standard value for S (default 2.0) 31 S_dict : dict 32 Dictionary for S values. If an entry for a given ensemble 33 exists this overwrites the standard value for that ensemble. 34 tau_exp_global : float 35 Standard value for tau_exp (default 0.0) 36 tau_exp_dict : dict 37 Dictionary for tau_exp values. If an entry for a given ensemble exists 38 this overwrites the standard value for that ensemble. 39 N_sigma_global : float 40 Standard value for N_sigma (default 1.0) 41 N_sigma_dict : dict 42 Dictionary for N_sigma values. If an entry for a given ensemble exists 43 this overwrites the standard value for that ensemble. 44 """ 45 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 46 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 47 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 48 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 49 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 50 51 S_global = 2.0 52 S_dict = {} 53 tau_exp_global = 0.0 54 tau_exp_dict = {} 55 N_sigma_global = 1.0 56 N_sigma_dict = {} 57 filter_eps = 1e-10 58 59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None 140 141 @property 142 def value(self): 143 return self._value 144 145 @property 146 def dvalue(self): 147 return self._dvalue 148 149 @property 150 def e_names(self): 151 return sorted(set([o.split('|')[0] for o in self.names])) 152 153 @property 154 def cov_names(self): 155 return sorted(set([o for o in self.covobs.keys()])) 156 157 @property 158 def mc_names(self): 159 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 160 161 @property 162 def e_content(self): 163 res = {} 164 for e, e_name in enumerate(self.e_names): 165 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 166 if e_name in self.names: 167 res[e_name].append(e_name) 168 return res 169 170 @property 171 def covobs(self): 172 return self._covobs 173 174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 271 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 272 # Make sure no entry of tauint is smaller than 0.5 273 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 274 # hep-lat/0306017 eq. (42) 275 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 276 self.e_n_dtauint[e_name][0] = 0.0 277 278 def _compute_drho(i): 279 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 280 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 281 282 _compute_drho(1) 283 if self.tau_exp[e_name] > 0: 284 texp = self.tau_exp[e_name] 285 # Critical slowing down analysis 286 if w_max // 2 <= 1: 287 raise Exception("Need at least 8 samples for tau_exp error analysis") 288 for n in range(1, w_max // 2): 289 _compute_drho(n + 1) 290 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 291 # Bias correction hep-lat/0306017 eq. (49) included 292 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 293 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 294 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 295 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 296 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 297 self.e_windowsize[e_name] = n 298 break 299 else: 300 if self.S[e_name] == 0.0: 301 self.e_tauint[e_name] = 0.5 302 self.e_dtauint[e_name] = 0.0 303 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 305 self.e_windowsize[e_name] = 0 306 else: 307 # Standard automatic windowing procedure 308 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 309 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 310 for n in range(1, w_max): 311 if n < w_max // 2 - 2: 312 _compute_drho(n + 1) 313 if g_w[n - 1] < 0 or n >= w_max - 1: 314 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 315 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 316 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 318 self.e_windowsize[e_name] = n 319 break 320 321 self._dvalue += self.e_dvalue[e_name] ** 2 322 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 323 324 for e_name in self.cov_names: 325 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 326 self.e_ddvalue[e_name] = 0 327 self._dvalue += self.e_dvalue[e_name]**2 328 329 self._dvalue = np.sqrt(self._dvalue) 330 if self._dvalue == 0.0: 331 self.ddvalue = 0.0 332 else: 333 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 334 return 335 336 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 337 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 338 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 339 340 Parameters 341 ---------- 342 deltas : list 343 List of fluctuations 344 idx : list 345 List or range of configurations on which the deltas are defined. 346 shape : int 347 Number of configurations in idx. 348 w_max : int 349 Upper bound for the summation window. 350 fft : bool 351 determines whether the fft algorithm is used for the computation 352 of the autocorrelation function. 353 """ 354 gamma = np.zeros(w_max) 355 deltas = _expand_deltas(deltas, idx, shape) 356 new_shape = len(deltas) 357 if fft: 358 max_gamma = min(new_shape, w_max) 359 # The padding for the fft has to be even 360 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 361 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 362 else: 363 for n in range(w_max): 364 if new_shape - n >= 0: 365 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 366 367 return gamma 368 369 def details(self, ens_content=True): 370 """Output detailed properties of the Obs. 371 372 Parameters 373 ---------- 374 ens_content : bool 375 print details about the ensembles and replica if true. 376 """ 377 if self.tag is not None: 378 print("Description:", self.tag) 379 if not hasattr(self, 'e_dvalue'): 380 print('Result\t %3.8e' % (self.value)) 381 else: 382 if self.value == 0.0: 383 percentage = np.nan 384 else: 385 percentage = np.abs(self._dvalue / self.value) * 100 386 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 387 if len(self.e_names) > 1: 388 print(' Ensemble errors:') 389 for e_name in self.mc_names: 390 if len(self.e_names) > 1: 391 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 392 if self.tau_exp[e_name] > 0: 393 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 394 else: 395 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 396 for e_name in self.cov_names: 397 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 398 if ens_content is True: 399 if len(self.e_names) == 1: 400 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 401 else: 402 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 403 my_string_list = [] 404 for key, value in sorted(self.e_content.items()): 405 if key not in self.covobs: 406 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 407 if len(value) == 1: 408 my_string += f': {self.shape[value[0]]} configurations' 409 if isinstance(self.idl[value[0]], range): 410 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 411 else: 412 my_string += ' (irregular range)' 413 else: 414 sublist = [] 415 for v in value: 416 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 417 my_substring += f': {self.shape[v]} configurations' 418 if isinstance(self.idl[v], range): 419 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 420 else: 421 my_substring += ' (irregular range)' 422 sublist.append(my_substring) 423 424 my_string += '\n' + '\n'.join(sublist) 425 else: 426 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 427 my_string_list.append(my_string) 428 print('\n'.join(my_string_list)) 429 430 def reweight(self, weight): 431 """Reweight the obs with given rewighting factors. 432 433 Parameters 434 ---------- 435 weight : Obs 436 Reweighting factor. An Observable that has to be defined on a superset of the 437 configurations in obs[i].idl for all i. 438 all_configs : bool 439 if True, the reweighted observables are normalized by the average of 440 the reweighting factor on all configurations in weight.idl and not 441 on the configurations in obs[i].idl. Default False. 442 """ 443 return reweight(weight, [self])[0] 444 445 def is_zero_within_error(self, sigma=1): 446 """Checks whether the observable is zero within 'sigma' standard errors. 447 448 Parameters 449 ---------- 450 sigma : int 451 Number of standard errors used for the check. 452 453 Works only properly when the gamma method was run. 454 """ 455 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 456 457 def is_zero(self, atol=1e-10): 458 """Checks whether the observable is zero within a given tolerance. 459 460 Parameters 461 ---------- 462 atol : float 463 Absolute tolerance (for details see numpy documentation). 464 """ 465 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 466 467 def plot_tauint(self, save=None): 468 """Plot integrated autocorrelation time for each ensemble. 469 470 Parameters 471 ---------- 472 save : str 473 saves the figure to a file named 'save' if. 474 """ 475 if not hasattr(self, 'e_dvalue'): 476 raise Exception('Run the gamma method first.') 477 478 for e, e_name in enumerate(self.mc_names): 479 fig = plt.figure() 480 plt.xlabel(r'$W$') 481 plt.ylabel(r'$\tau_\mathrm{int}$') 482 length = int(len(self.e_n_tauint[e_name])) 483 if self.tau_exp[e_name] > 0: 484 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 485 x_help = np.arange(2 * self.tau_exp[e_name]) 486 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 487 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 488 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 489 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 490 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 491 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 492 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 493 else: 494 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 495 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 496 497 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 498 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 499 plt.legend() 500 plt.xlim(-0.5, xmax) 501 ylim = plt.ylim() 502 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 503 plt.draw() 504 if save: 505 fig.savefig(save + "_" + str(e)) 506 507 def plot_rho(self, save=None): 508 """Plot normalized autocorrelation function time for each ensemble. 509 510 Parameters 511 ---------- 512 save : str 513 saves the figure to a file named 'save' if. 514 """ 515 if not hasattr(self, 'e_dvalue'): 516 raise Exception('Run the gamma method first.') 517 for e, e_name in enumerate(self.mc_names): 518 fig = plt.figure() 519 plt.xlabel('W') 520 plt.ylabel('rho') 521 length = int(len(self.e_drho[e_name])) 522 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 523 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 524 if self.tau_exp[e_name] > 0: 525 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 526 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 527 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 528 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 529 else: 530 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 531 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 532 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 533 plt.xlim(-0.5, xmax) 534 plt.draw() 535 if save: 536 fig.savefig(save + "_" + str(e)) 537 538 def plot_rep_dist(self): 539 """Plot replica distribution for each ensemble with more than one replicum.""" 540 if not hasattr(self, 'e_dvalue'): 541 raise Exception('Run the gamma method first.') 542 for e, e_name in enumerate(self.mc_names): 543 if len(self.e_content[e_name]) == 1: 544 print('No replica distribution for a single replicum (', e_name, ')') 545 continue 546 r_length = [] 547 sub_r_mean = 0 548 for r, r_name in enumerate(self.e_content[e_name]): 549 r_length.append(len(self.deltas[r_name])) 550 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 551 e_N = np.sum(r_length) 552 sub_r_mean /= e_N 553 arr = np.zeros(len(self.e_content[e_name])) 554 for r, r_name in enumerate(self.e_content[e_name]): 555 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 556 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 557 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 558 plt.draw() 559 560 def plot_history(self, expand=True): 561 """Plot derived Monte Carlo history for each ensemble 562 563 Parameters 564 ---------- 565 expand : bool 566 show expanded history for irregular Monte Carlo chains (default: True). 567 """ 568 for e, e_name in enumerate(self.mc_names): 569 plt.figure() 570 r_length = [] 571 tmp = [] 572 tmp_expanded = [] 573 for r, r_name in enumerate(self.e_content[e_name]): 574 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 575 if expand: 576 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 577 r_length.append(len(tmp_expanded[-1])) 578 else: 579 r_length.append(len(tmp[-1])) 580 e_N = np.sum(r_length) 581 x = np.arange(e_N) 582 y_test = np.concatenate(tmp, axis=0) 583 if expand: 584 y = np.concatenate(tmp_expanded, axis=0) 585 else: 586 y = y_test 587 plt.errorbar(x, y, fmt='.', markersize=3) 588 plt.xlim(-0.5, e_N - 0.5) 589 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 590 plt.draw() 591 592 def plot_piechart(self, save=None): 593 """Plot piechart which shows the fractional contribution of each 594 ensemble to the error and returns a dictionary containing the fractions. 595 596 Parameters 597 ---------- 598 save : str 599 saves the figure to a file named 'save' if. 600 """ 601 if not hasattr(self, 'e_dvalue'): 602 raise Exception('Run the gamma method first.') 603 if np.isclose(0.0, self._dvalue, atol=1e-15): 604 raise Exception('Error is 0.0') 605 labels = self.e_names 606 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 607 fig1, ax1 = plt.subplots() 608 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 609 ax1.axis('equal') 610 plt.draw() 611 if save: 612 fig1.savefig(save) 613 614 return dict(zip(self.e_names, sizes)) 615 616 def dump(self, filename, datatype="json.gz", description="", **kwargs): 617 """Dump the Obs to a file 'name' of chosen format. 618 619 Parameters 620 ---------- 621 filename : str 622 name of the file to be saved. 623 datatype : str 624 Format of the exported file. Supported formats include 625 "json.gz" and "pickle" 626 description : str 627 Description for output file, only relevant for json.gz format. 628 path : str 629 specifies a custom path for the file (default '.') 630 """ 631 if 'path' in kwargs: 632 file_name = kwargs.get('path') + '/' + filename 633 else: 634 file_name = filename 635 636 if datatype == "json.gz": 637 from .input.json import dump_to_json 638 dump_to_json([self], file_name, description=description) 639 elif datatype == "pickle": 640 with open(file_name + '.p', 'wb') as fb: 641 pickle.dump(self, fb) 642 else: 643 raise Exception("Unknown datatype " + str(datatype)) 644 645 def export_jackknife(self): 646 """Export jackknife samples from the Obs 647 648 Returns 649 ------- 650 numpy.ndarray 651 Returns a numpy array of length N + 1 where N is the number of samples 652 for the given ensemble and replicum. The zeroth entry of the array contains 653 the mean value of the Obs, entries 1 to N contain the N jackknife samples 654 derived from the Obs. The current implementation only works for observables 655 defined on exactly one ensemble and replicum. The derived jackknife samples 656 should agree with samples from a full jackknife analysis up to O(1/N). 657 """ 658 659 if len(self.names) != 1: 660 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 661 662 name = self.names[0] 663 full_data = self.deltas[name] + self.r_values[name] 664 n = full_data.size 665 mean = self.value 666 tmp_jacks = np.zeros(n + 1) 667 tmp_jacks[0] = mean 668 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 669 return tmp_jacks 670 671 def __float__(self): 672 return float(self.value) 673 674 def __repr__(self): 675 return 'Obs[' + str(self) + ']' 676 677 def __str__(self): 678 if self._dvalue == 0.0: 679 return str(self.value) 680 fexp = np.floor(np.log10(self._dvalue)) 681 if fexp < 0.0: 682 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 683 elif fexp == 0.0: 684 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 685 else: 686 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 687 688 # Overload comparisons 689 def __lt__(self, other): 690 return self.value < other 691 692 def __le__(self, other): 693 return self.value <= other 694 695 def __gt__(self, other): 696 return self.value > other 697 698 def __ge__(self, other): 699 return self.value >= other 700 701 def __eq__(self, other): 702 return (self - other).is_zero() 703 704 def __ne__(self, other): 705 return not (self - other).is_zero() 706 707 # Overload math operations 708 def __add__(self, y): 709 if isinstance(y, Obs): 710 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 711 else: 712 if isinstance(y, np.ndarray): 713 return np.array([self + o for o in y]) 714 elif y.__class__.__name__ in ['Corr', 'CObs']: 715 return NotImplemented 716 else: 717 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 718 719 def __radd__(self, y): 720 return self + y 721 722 def __mul__(self, y): 723 if isinstance(y, Obs): 724 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 725 else: 726 if isinstance(y, np.ndarray): 727 return np.array([self * o for o in y]) 728 elif isinstance(y, complex): 729 return CObs(self * y.real, self * y.imag) 730 elif y.__class__.__name__ in ['Corr', 'CObs']: 731 return NotImplemented 732 else: 733 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 734 735 def __rmul__(self, y): 736 return self * y 737 738 def __sub__(self, y): 739 if isinstance(y, Obs): 740 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 741 else: 742 if isinstance(y, np.ndarray): 743 return np.array([self - o for o in y]) 744 elif y.__class__.__name__ in ['Corr', 'CObs']: 745 return NotImplemented 746 else: 747 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 748 749 def __rsub__(self, y): 750 return -1 * (self - y) 751 752 def __pos__(self): 753 return self 754 755 def __neg__(self): 756 return -1 * self 757 758 def __truediv__(self, y): 759 if isinstance(y, Obs): 760 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 761 else: 762 if isinstance(y, np.ndarray): 763 return np.array([self / o for o in y]) 764 elif y.__class__.__name__ in ['Corr', 'CObs']: 765 return NotImplemented 766 else: 767 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 768 769 def __rtruediv__(self, y): 770 if isinstance(y, Obs): 771 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 772 else: 773 if isinstance(y, np.ndarray): 774 return np.array([o / self for o in y]) 775 elif y.__class__.__name__ in ['Corr', 'CObs']: 776 return NotImplemented 777 else: 778 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 779 780 def __pow__(self, y): 781 if isinstance(y, Obs): 782 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 783 else: 784 return derived_observable(lambda x: x[0] ** y, [self]) 785 786 def __rpow__(self, y): 787 if isinstance(y, Obs): 788 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 789 else: 790 return derived_observable(lambda x: y ** x[0], [self]) 791 792 def __abs__(self): 793 return derived_observable(lambda x: anp.abs(x[0]), [self]) 794 795 # Overload numpy functions 796 def sqrt(self): 797 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 798 799 def log(self): 800 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 801 802 def exp(self): 803 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 804 805 def sin(self): 806 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 807 808 def cos(self): 809 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 810 811 def tan(self): 812 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 813 814 def arcsin(self): 815 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 816 817 def arccos(self): 818 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 819 820 def arctan(self): 821 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 822 823 def sinh(self): 824 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 825 826 def cosh(self): 827 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 828 829 def tanh(self): 830 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 831 832 def arcsinh(self): 833 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 834 835 def arccosh(self): 836 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 837 838 def arctanh(self): 839 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
59 def __init__(self, samples, names, idl=None, **kwargs): 60 """ Initialize Obs object. 61 62 Parameters 63 ---------- 64 samples : list 65 list of numpy arrays containing the Monte Carlo samples 66 names : list 67 list of strings labeling the individual samples 68 idl : list, optional 69 list of ranges or lists on which the samples are defined 70 """ 71 72 if kwargs.get("means") is None and len(samples): 73 if len(samples) != len(names): 74 raise Exception('Length of samples and names incompatible.') 75 if idl is not None: 76 if len(idl) != len(names): 77 raise Exception('Length of idl incompatible with samples and names.') 78 name_length = len(names) 79 if name_length > 1: 80 if name_length != len(set(names)): 81 raise Exception('names are not unique.') 82 if not all(isinstance(x, str) for x in names): 83 raise TypeError('All names have to be strings.') 84 else: 85 if not isinstance(names[0], str): 86 raise TypeError('All names have to be strings.') 87 if min(len(x) for x in samples) <= 4: 88 raise Exception('Samples have to have at least 5 entries.') 89 90 self.names = sorted(names) 91 self.shape = {} 92 self.r_values = {} 93 self.deltas = {} 94 self._covobs = {} 95 96 self._value = 0 97 self.N = 0 98 self.is_merged = {} 99 self.idl = {} 100 if idl is not None: 101 for name, idx in sorted(zip(names, idl)): 102 if isinstance(idx, range): 103 self.idl[name] = idx 104 elif isinstance(idx, (list, np.ndarray)): 105 dc = np.unique(np.diff(idx)) 106 if np.any(dc < 0): 107 raise Exception("Unsorted idx for idl[%s]" % (name)) 108 if len(dc) == 1: 109 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 110 else: 111 self.idl[name] = list(idx) 112 else: 113 raise Exception('incompatible type for idl[%s].' % (name)) 114 else: 115 for name, sample in sorted(zip(names, samples)): 116 self.idl[name] = range(1, len(sample) + 1) 117 118 if kwargs.get("means") is not None: 119 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 120 self.shape[name] = len(self.idl[name]) 121 self.N += self.shape[name] 122 self.r_values[name] = mean 123 self.deltas[name] = sample 124 else: 125 for name, sample in sorted(zip(names, samples)): 126 self.shape[name] = len(self.idl[name]) 127 self.N += self.shape[name] 128 if len(sample) != self.shape[name]: 129 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 130 self.r_values[name] = np.mean(sample) 131 self.deltas[name] = sample - self.r_values[name] 132 self._value += self.shape[name] * self.r_values[name] 133 self._value /= self.N 134 135 self._dvalue = 0.0 136 self.ddvalue = 0.0 137 self.reweighted = False 138 139 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
174 def gamma_method(self, **kwargs): 175 """Estimate the error and related properties of the Obs. 176 177 Parameters 178 ---------- 179 S : float 180 specifies a custom value for the parameter S (default 2.0). 181 If set to 0 it is assumed that the data exhibits no 182 autocorrelation. In this case the error estimates coincides 183 with the sample standard error. 184 tau_exp : float 185 positive value triggers the critical slowing down analysis 186 (default 0.0). 187 N_sigma : float 188 number of standard deviations from zero until the tail is 189 attached to the autocorrelation function (default 1). 190 fft : bool 191 determines whether the fft algorithm is used for the computation 192 of the autocorrelation function (default True) 193 """ 194 195 e_content = self.e_content 196 self.e_dvalue = {} 197 self.e_ddvalue = {} 198 self.e_tauint = {} 199 self.e_dtauint = {} 200 self.e_windowsize = {} 201 self.e_n_tauint = {} 202 self.e_n_dtauint = {} 203 e_gamma = {} 204 self.e_rho = {} 205 self.e_drho = {} 206 self._dvalue = 0 207 self.ddvalue = 0 208 209 self.S = {} 210 self.tau_exp = {} 211 self.N_sigma = {} 212 213 if kwargs.get('fft') is False: 214 fft = False 215 else: 216 fft = True 217 218 def _parse_kwarg(kwarg_name): 219 if kwarg_name in kwargs: 220 tmp = kwargs.get(kwarg_name) 221 if isinstance(tmp, (int, float)): 222 if tmp < 0: 223 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 224 for e, e_name in enumerate(self.e_names): 225 getattr(self, kwarg_name)[e_name] = tmp 226 else: 227 raise TypeError(kwarg_name + ' is not in proper format.') 228 else: 229 for e, e_name in enumerate(self.e_names): 230 if e_name in getattr(Obs, kwarg_name + '_dict'): 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 232 else: 233 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 234 235 _parse_kwarg('S') 236 _parse_kwarg('tau_exp') 237 _parse_kwarg('N_sigma') 238 239 for e, e_name in enumerate(self.mc_names): 240 r_length = [] 241 for r_name in e_content[e_name]: 242 if isinstance(self.idl[r_name], range): 243 r_length.append(len(self.idl[r_name])) 244 else: 245 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 246 247 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 248 w_max = max(r_length) // 2 249 e_gamma[e_name] = np.zeros(w_max) 250 self.e_rho[e_name] = np.zeros(w_max) 251 self.e_drho[e_name] = np.zeros(w_max) 252 253 for r_name in e_content[e_name]: 254 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 255 256 gamma_div = np.zeros(w_max) 257 for r_name in e_content[e_name]: 258 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 259 gamma_div[gamma_div < 1] = 1.0 260 e_gamma[e_name] /= gamma_div[:w_max] 261 262 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 263 self.e_tauint[e_name] = 0.5 264 self.e_dtauint[e_name] = 0.0 265 self.e_dvalue[e_name] = 0.0 266 self.e_ddvalue[e_name] = 0.0 267 self.e_windowsize[e_name] = 0 268 continue 269 270 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 271 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 272 # Make sure no entry of tauint is smaller than 0.5 273 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 274 # hep-lat/0306017 eq. (42) 275 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 276 self.e_n_dtauint[e_name][0] = 0.0 277 278 def _compute_drho(i): 279 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 280 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 281 282 _compute_drho(1) 283 if self.tau_exp[e_name] > 0: 284 texp = self.tau_exp[e_name] 285 # Critical slowing down analysis 286 if w_max // 2 <= 1: 287 raise Exception("Need at least 8 samples for tau_exp error analysis") 288 for n in range(1, w_max // 2): 289 _compute_drho(n + 1) 290 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 291 # Bias correction hep-lat/0306017 eq. (49) included 292 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 293 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 294 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 295 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 296 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 297 self.e_windowsize[e_name] = n 298 break 299 else: 300 if self.S[e_name] == 0.0: 301 self.e_tauint[e_name] = 0.5 302 self.e_dtauint[e_name] = 0.0 303 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 304 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 305 self.e_windowsize[e_name] = 0 306 else: 307 # Standard automatic windowing procedure 308 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 309 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 310 for n in range(1, w_max): 311 if n < w_max // 2 - 2: 312 _compute_drho(n + 1) 313 if g_w[n - 1] < 0 or n >= w_max - 1: 314 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 315 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 316 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 317 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 318 self.e_windowsize[e_name] = n 319 break 320 321 self._dvalue += self.e_dvalue[e_name] ** 2 322 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 323 324 for e_name in self.cov_names: 325 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 326 self.e_ddvalue[e_name] = 0 327 self._dvalue += self.e_dvalue[e_name]**2 328 329 self._dvalue = np.sqrt(self._dvalue) 330 if self._dvalue == 0.0: 331 self.ddvalue = 0.0 332 else: 333 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 334 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
369 def details(self, ens_content=True): 370 """Output detailed properties of the Obs. 371 372 Parameters 373 ---------- 374 ens_content : bool 375 print details about the ensembles and replica if true. 376 """ 377 if self.tag is not None: 378 print("Description:", self.tag) 379 if not hasattr(self, 'e_dvalue'): 380 print('Result\t %3.8e' % (self.value)) 381 else: 382 if self.value == 0.0: 383 percentage = np.nan 384 else: 385 percentage = np.abs(self._dvalue / self.value) * 100 386 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 387 if len(self.e_names) > 1: 388 print(' Ensemble errors:') 389 for e_name in self.mc_names: 390 if len(self.e_names) > 1: 391 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 392 if self.tau_exp[e_name] > 0: 393 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 394 else: 395 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 396 for e_name in self.cov_names: 397 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 398 if ens_content is True: 399 if len(self.e_names) == 1: 400 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 401 else: 402 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 403 my_string_list = [] 404 for key, value in sorted(self.e_content.items()): 405 if key not in self.covobs: 406 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 407 if len(value) == 1: 408 my_string += f': {self.shape[value[0]]} configurations' 409 if isinstance(self.idl[value[0]], range): 410 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 411 else: 412 my_string += ' (irregular range)' 413 else: 414 sublist = [] 415 for v in value: 416 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 417 my_substring += f': {self.shape[v]} configurations' 418 if isinstance(self.idl[v], range): 419 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 420 else: 421 my_substring += ' (irregular range)' 422 sublist.append(my_substring) 423 424 my_string += '\n' + '\n'.join(sublist) 425 else: 426 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 427 my_string_list.append(my_string) 428 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
430 def reweight(self, weight): 431 """Reweight the obs with given rewighting factors. 432 433 Parameters 434 ---------- 435 weight : Obs 436 Reweighting factor. An Observable that has to be defined on a superset of the 437 configurations in obs[i].idl for all i. 438 all_configs : bool 439 if True, the reweighted observables are normalized by the average of 440 the reweighting factor on all configurations in weight.idl and not 441 on the configurations in obs[i].idl. Default False. 442 """ 443 return reweight(weight, [self])[0]
Reweight the obs with given rewighting factors.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
445 def is_zero_within_error(self, sigma=1): 446 """Checks whether the observable is zero within 'sigma' standard errors. 447 448 Parameters 449 ---------- 450 sigma : int 451 Number of standard errors used for the check. 452 453 Works only properly when the gamma method was run. 454 """ 455 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
457 def is_zero(self, atol=1e-10): 458 """Checks whether the observable is zero within a given tolerance. 459 460 Parameters 461 ---------- 462 atol : float 463 Absolute tolerance (for details see numpy documentation). 464 """ 465 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
467 def plot_tauint(self, save=None): 468 """Plot integrated autocorrelation time for each ensemble. 469 470 Parameters 471 ---------- 472 save : str 473 saves the figure to a file named 'save' if. 474 """ 475 if not hasattr(self, 'e_dvalue'): 476 raise Exception('Run the gamma method first.') 477 478 for e, e_name in enumerate(self.mc_names): 479 fig = plt.figure() 480 plt.xlabel(r'$W$') 481 plt.ylabel(r'$\tau_\mathrm{int}$') 482 length = int(len(self.e_n_tauint[e_name])) 483 if self.tau_exp[e_name] > 0: 484 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 485 x_help = np.arange(2 * self.tau_exp[e_name]) 486 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 487 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 488 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 489 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 490 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 491 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 492 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 493 else: 494 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 495 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 496 497 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 498 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 499 plt.legend() 500 plt.xlim(-0.5, xmax) 501 ylim = plt.ylim() 502 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 503 plt.draw() 504 if save: 505 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
507 def plot_rho(self, save=None): 508 """Plot normalized autocorrelation function time for each ensemble. 509 510 Parameters 511 ---------- 512 save : str 513 saves the figure to a file named 'save' if. 514 """ 515 if not hasattr(self, 'e_dvalue'): 516 raise Exception('Run the gamma method first.') 517 for e, e_name in enumerate(self.mc_names): 518 fig = plt.figure() 519 plt.xlabel('W') 520 plt.ylabel('rho') 521 length = int(len(self.e_drho[e_name])) 522 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 523 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 524 if self.tau_exp[e_name] > 0: 525 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 526 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 527 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 528 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 529 else: 530 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 531 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 532 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 533 plt.xlim(-0.5, xmax) 534 plt.draw() 535 if save: 536 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
538 def plot_rep_dist(self): 539 """Plot replica distribution for each ensemble with more than one replicum.""" 540 if not hasattr(self, 'e_dvalue'): 541 raise Exception('Run the gamma method first.') 542 for e, e_name in enumerate(self.mc_names): 543 if len(self.e_content[e_name]) == 1: 544 print('No replica distribution for a single replicum (', e_name, ')') 545 continue 546 r_length = [] 547 sub_r_mean = 0 548 for r, r_name in enumerate(self.e_content[e_name]): 549 r_length.append(len(self.deltas[r_name])) 550 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 551 e_N = np.sum(r_length) 552 sub_r_mean /= e_N 553 arr = np.zeros(len(self.e_content[e_name])) 554 for r, r_name in enumerate(self.e_content[e_name]): 555 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 556 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 557 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 558 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
560 def plot_history(self, expand=True): 561 """Plot derived Monte Carlo history for each ensemble 562 563 Parameters 564 ---------- 565 expand : bool 566 show expanded history for irregular Monte Carlo chains (default: True). 567 """ 568 for e, e_name in enumerate(self.mc_names): 569 plt.figure() 570 r_length = [] 571 tmp = [] 572 tmp_expanded = [] 573 for r, r_name in enumerate(self.e_content[e_name]): 574 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 575 if expand: 576 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 577 r_length.append(len(tmp_expanded[-1])) 578 else: 579 r_length.append(len(tmp[-1])) 580 e_N = np.sum(r_length) 581 x = np.arange(e_N) 582 y_test = np.concatenate(tmp, axis=0) 583 if expand: 584 y = np.concatenate(tmp_expanded, axis=0) 585 else: 586 y = y_test 587 plt.errorbar(x, y, fmt='.', markersize=3) 588 plt.xlim(-0.5, e_N - 0.5) 589 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 590 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
592 def plot_piechart(self, save=None): 593 """Plot piechart which shows the fractional contribution of each 594 ensemble to the error and returns a dictionary containing the fractions. 595 596 Parameters 597 ---------- 598 save : str 599 saves the figure to a file named 'save' if. 600 """ 601 if not hasattr(self, 'e_dvalue'): 602 raise Exception('Run the gamma method first.') 603 if np.isclose(0.0, self._dvalue, atol=1e-15): 604 raise Exception('Error is 0.0') 605 labels = self.e_names 606 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 607 fig1, ax1 = plt.subplots() 608 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 609 ax1.axis('equal') 610 plt.draw() 611 if save: 612 fig1.savefig(save) 613 614 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
616 def dump(self, filename, datatype="json.gz", description="", **kwargs): 617 """Dump the Obs to a file 'name' of chosen format. 618 619 Parameters 620 ---------- 621 filename : str 622 name of the file to be saved. 623 datatype : str 624 Format of the exported file. Supported formats include 625 "json.gz" and "pickle" 626 description : str 627 Description for output file, only relevant for json.gz format. 628 path : str 629 specifies a custom path for the file (default '.') 630 """ 631 if 'path' in kwargs: 632 file_name = kwargs.get('path') + '/' + filename 633 else: 634 file_name = filename 635 636 if datatype == "json.gz": 637 from .input.json import dump_to_json 638 dump_to_json([self], file_name, description=description) 639 elif datatype == "pickle": 640 with open(file_name + '.p', 'wb') as fb: 641 pickle.dump(self, fb) 642 else: 643 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
645 def export_jackknife(self): 646 """Export jackknife samples from the Obs 647 648 Returns 649 ------- 650 numpy.ndarray 651 Returns a numpy array of length N + 1 where N is the number of samples 652 for the given ensemble and replicum. The zeroth entry of the array contains 653 the mean value of the Obs, entries 1 to N contain the N jackknife samples 654 derived from the Obs. The current implementation only works for observables 655 defined on exactly one ensemble and replicum. The derived jackknife samples 656 should agree with samples from a full jackknife analysis up to O(1/N). 657 """ 658 659 if len(self.names) != 1: 660 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 661 662 name = self.names[0] 663 full_data = self.deltas[name] + self.r_values[name] 664 n = full_data.size 665 mean = self.value 666 tmp_jacks = np.zeros(n + 1) 667 tmp_jacks[0] = mean 668 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 669 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
842class CObs: 843 """Class for a complex valued observable.""" 844 __slots__ = ['_real', '_imag', 'tag'] 845 846 def __init__(self, real, imag=0.0): 847 self._real = real 848 self._imag = imag 849 self.tag = None 850 851 @property 852 def real(self): 853 return self._real 854 855 @property 856 def imag(self): 857 return self._imag 858 859 def gamma_method(self, **kwargs): 860 """Executes the gamma_method for the real and the imaginary part.""" 861 if isinstance(self.real, Obs): 862 self.real.gamma_method(**kwargs) 863 if isinstance(self.imag, Obs): 864 self.imag.gamma_method(**kwargs) 865 866 def is_zero(self): 867 """Checks whether both real and imaginary part are zero within machine precision.""" 868 return self.real == 0.0 and self.imag == 0.0 869 870 def conjugate(self): 871 return CObs(self.real, -self.imag) 872 873 def __add__(self, other): 874 if isinstance(other, np.ndarray): 875 return other + self 876 elif hasattr(other, 'real') and hasattr(other, 'imag'): 877 return CObs(self.real + other.real, 878 self.imag + other.imag) 879 else: 880 return CObs(self.real + other, self.imag) 881 882 def __radd__(self, y): 883 return self + y 884 885 def __sub__(self, other): 886 if isinstance(other, np.ndarray): 887 return -1 * (other - self) 888 elif hasattr(other, 'real') and hasattr(other, 'imag'): 889 return CObs(self.real - other.real, self.imag - other.imag) 890 else: 891 return CObs(self.real - other, self.imag) 892 893 def __rsub__(self, other): 894 return -1 * (self - other) 895 896 def __mul__(self, other): 897 if isinstance(other, np.ndarray): 898 return other * self 899 elif hasattr(other, 'real') and hasattr(other, 'imag'): 900 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 901 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 902 [self.real, other.real, self.imag, other.imag], 903 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 904 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 905 [self.real, other.real, self.imag, other.imag], 906 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 907 elif getattr(other, 'imag', 0) != 0: 908 return CObs(self.real * other.real - self.imag * other.imag, 909 self.imag * other.real + self.real * other.imag) 910 else: 911 return CObs(self.real * other.real, self.imag * other.real) 912 else: 913 return CObs(self.real * other, self.imag * other) 914 915 def __rmul__(self, other): 916 return self * other 917 918 def __truediv__(self, other): 919 if isinstance(other, np.ndarray): 920 return 1 / (other / self) 921 elif hasattr(other, 'real') and hasattr(other, 'imag'): 922 r = other.real ** 2 + other.imag ** 2 923 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 924 else: 925 return CObs(self.real / other, self.imag / other) 926 927 def __rtruediv__(self, other): 928 r = self.real ** 2 + self.imag ** 2 929 if hasattr(other, 'real') and hasattr(other, 'imag'): 930 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 931 else: 932 return CObs(self.real * other / r, -self.imag * other / r) 933 934 def __abs__(self): 935 return np.sqrt(self.real**2 + self.imag**2) 936 937 def __pos__(self): 938 return self 939 940 def __neg__(self): 941 return -1 * self 942 943 def __eq__(self, other): 944 return self.real == other.real and self.imag == other.imag 945 946 def __str__(self): 947 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 948 949 def __repr__(self): 950 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
859 def gamma_method(self, **kwargs): 860 """Executes the gamma_method for the real and the imaginary part.""" 861 if isinstance(self.real, Obs): 862 self.real.gamma_method(**kwargs) 863 if isinstance(self.imag, Obs): 864 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1119def derived_observable(func, data, array_mode=False, **kwargs): 1120 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1121 1122 Parameters 1123 ---------- 1124 func : object 1125 arbitrary function of the form func(data, **kwargs). For the 1126 automatic differentiation to work, all numpy functions have to have 1127 the autograd wrapper (use 'import autograd.numpy as anp'). 1128 data : list 1129 list of Obs, e.g. [obs1, obs2, obs3]. 1130 num_grad : bool 1131 if True, numerical derivatives are used instead of autograd 1132 (default False). To control the numerical differentiation the 1133 kwargs of numdifftools.step_generators.MaxStepGenerator 1134 can be used. 1135 man_grad : list 1136 manually supply a list or an array which contains the jacobian 1137 of func. Use cautiously, supplying the wrong derivative will 1138 not be intercepted. 1139 1140 Notes 1141 ----- 1142 For simple mathematical operations it can be practical to use anonymous 1143 functions. For the ratio of two observables one can e.g. use 1144 1145 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1146 """ 1147 1148 data = np.asarray(data) 1149 raveled_data = data.ravel() 1150 1151 # Workaround for matrix operations containing non Obs data 1152 if not all(isinstance(x, Obs) for x in raveled_data): 1153 for i in range(len(raveled_data)): 1154 if isinstance(raveled_data[i], (int, float)): 1155 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1156 1157 allcov = {} 1158 for o in raveled_data: 1159 for name in o.cov_names: 1160 if name in allcov: 1161 if not np.allclose(allcov[name], o.covobs[name].cov): 1162 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1163 else: 1164 allcov[name] = o.covobs[name].cov 1165 1166 n_obs = len(raveled_data) 1167 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1168 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1169 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1170 1171 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1172 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1173 1174 if data.ndim == 1: 1175 values = np.array([o.value for o in data]) 1176 else: 1177 values = np.vectorize(lambda x: x.value)(data) 1178 1179 new_values = func(values, **kwargs) 1180 1181 multi = int(isinstance(new_values, np.ndarray)) 1182 1183 new_r_values = {} 1184 new_idl_d = {} 1185 for name in new_sample_names: 1186 idl = [] 1187 tmp_values = np.zeros(n_obs) 1188 for i, item in enumerate(raveled_data): 1189 tmp_values[i] = item.r_values.get(name, item.value) 1190 tmp_idl = item.idl.get(name) 1191 if tmp_idl is not None: 1192 idl.append(tmp_idl) 1193 if multi > 0: 1194 tmp_values = np.array(tmp_values).reshape(data.shape) 1195 new_r_values[name] = func(tmp_values, **kwargs) 1196 new_idl_d[name] = _merge_idx(idl) 1197 if not is_merged[name]: 1198 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1199 1200 if 'man_grad' in kwargs: 1201 deriv = np.asarray(kwargs.get('man_grad')) 1202 if new_values.shape + data.shape != deriv.shape: 1203 raise Exception('Manual derivative does not have correct shape.') 1204 elif kwargs.get('num_grad') is True: 1205 if multi > 0: 1206 raise Exception('Multi mode currently not supported for numerical derivative') 1207 options = { 1208 'base_step': 0.1, 1209 'step_ratio': 2.5} 1210 for key in options.keys(): 1211 kwarg = kwargs.get(key) 1212 if kwarg is not None: 1213 options[key] = kwarg 1214 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1215 if tmp_df.size == 1: 1216 deriv = np.array([tmp_df.real]) 1217 else: 1218 deriv = tmp_df.real 1219 else: 1220 deriv = jacobian(func)(values, **kwargs) 1221 1222 final_result = np.zeros(new_values.shape, dtype=object) 1223 1224 if array_mode is True: 1225 1226 class _Zero_grad(): 1227 def __init__(self, N): 1228 self.grad = np.zeros((N, 1)) 1229 1230 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1231 d_extracted = {} 1232 g_extracted = {} 1233 for name in new_sample_names: 1234 d_extracted[name] = [] 1235 ens_length = len(new_idl_d[name]) 1236 for i_dat, dat in enumerate(data): 1237 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1238 for name in new_cov_names: 1239 g_extracted[name] = [] 1240 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1241 for i_dat, dat in enumerate(data): 1242 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1243 1244 for i_val, new_val in np.ndenumerate(new_values): 1245 new_deltas = {} 1246 new_grad = {} 1247 if array_mode is True: 1248 for name in new_sample_names: 1249 ens_length = d_extracted[name][0].shape[-1] 1250 new_deltas[name] = np.zeros(ens_length) 1251 for i_dat, dat in enumerate(d_extracted[name]): 1252 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1253 for name in new_cov_names: 1254 new_grad[name] = 0 1255 for i_dat, dat in enumerate(g_extracted[name]): 1256 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1257 else: 1258 for j_obs, obs in np.ndenumerate(data): 1259 for name in obs.names: 1260 if name in obs.cov_names: 1261 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1262 else: 1263 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1264 1265 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1266 1267 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1268 raise Exception('The same name has been used for deltas and covobs!') 1269 new_samples = [] 1270 new_means = [] 1271 new_idl = [] 1272 new_names_obs = [] 1273 for name in new_names: 1274 if name not in new_covobs: 1275 if is_merged[name]: 1276 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1277 else: 1278 filtered_deltas = new_deltas[name] 1279 filtered_idl_d = new_idl_d[name] 1280 1281 new_samples.append(filtered_deltas) 1282 new_idl.append(filtered_idl_d) 1283 new_means.append(new_r_values[name][i_val]) 1284 new_names_obs.append(name) 1285 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1286 for name in new_covobs: 1287 final_result[i_val].names.append(name) 1288 final_result[i_val]._covobs = new_covobs 1289 final_result[i_val]._value = new_val 1290 final_result[i_val].is_merged = is_merged 1291 final_result[i_val].reweighted = reweighted 1292 1293 if multi == 0: 1294 final_result = final_result.item() 1295 1296 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1336def reweight(weight, obs, **kwargs): 1337 """Reweight a list of observables. 1338 1339 Parameters 1340 ---------- 1341 weight : Obs 1342 Reweighting factor. An Observable that has to be defined on a superset of the 1343 configurations in obs[i].idl for all i. 1344 obs : list 1345 list of Obs, e.g. [obs1, obs2, obs3]. 1346 all_configs : bool 1347 if True, the reweighted observables are normalized by the average of 1348 the reweighting factor on all configurations in weight.idl and not 1349 on the configurations in obs[i].idl. Default False. 1350 """ 1351 result = [] 1352 for i in range(len(obs)): 1353 if len(obs[i].cov_names): 1354 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1355 if not set(obs[i].names).issubset(weight.names): 1356 raise Exception('Error: Ensembles do not fit') 1357 for name in obs[i].names: 1358 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1359 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1360 new_samples = [] 1361 w_deltas = {} 1362 for name in sorted(obs[i].names): 1363 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1364 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1365 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1366 1367 if kwargs.get('all_configs'): 1368 new_weight = weight 1369 else: 1370 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1371 1372 result.append(tmp_obs / new_weight) 1373 result[-1].reweighted = True 1374 result[-1].is_merged = obs[i].is_merged 1375 1376 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl. Default False.
1379def correlate(obs_a, obs_b): 1380 """Correlate two observables. 1381 1382 Parameters 1383 ---------- 1384 obs_a : Obs 1385 First observable 1386 obs_b : Obs 1387 Second observable 1388 1389 Notes 1390 ----- 1391 Keep in mind to only correlate primary observables which have not been reweighted 1392 yet. The reweighting has to be applied after correlating the observables. 1393 Currently only works if ensembles are identical (this is not strictly necessary). 1394 """ 1395 1396 if sorted(obs_a.names) != sorted(obs_b.names): 1397 raise Exception('Ensembles do not fit') 1398 if len(obs_a.cov_names) or len(obs_b.cov_names): 1399 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1400 for name in obs_a.names: 1401 if obs_a.shape[name] != obs_b.shape[name]: 1402 raise Exception('Shapes of ensemble', name, 'do not fit') 1403 if obs_a.idl[name] != obs_b.idl[name]: 1404 raise Exception('idl of ensemble', name, 'do not fit') 1405 1406 if obs_a.reweighted is True: 1407 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1408 if obs_b.reweighted is True: 1409 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1410 1411 new_samples = [] 1412 new_idl = [] 1413 for name in sorted(obs_a.names): 1414 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1415 new_idl.append(obs_a.idl[name]) 1416 1417 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1418 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1419 o.reweighted = obs_a.reweighted or obs_b.reweighted 1420 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1423def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1424 r'''Calculates the error covariance matrix of a set of observables. 1425 1426 The gamma method has to be applied first to all observables. 1427 1428 Parameters 1429 ---------- 1430 obs : list or numpy.ndarray 1431 List or one dimensional array of Obs 1432 visualize : bool 1433 If True plots the corresponding normalized correlation matrix (default False). 1434 correlation : bool 1435 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1436 smooth : None or int 1437 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1438 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1439 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1440 small ones. 1441 1442 Notes 1443 ----- 1444 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1445 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1446 in the absence of autocorrelation. 1447 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1448 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1449 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1450 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1451 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1452 ''' 1453 1454 length = len(obs) 1455 1456 max_samples = np.max([o.N for o in obs]) 1457 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1458 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1459 1460 cov = np.zeros((length, length)) 1461 for i in range(length): 1462 for j in range(i, length): 1463 cov[i, j] = _covariance_element(obs[i], obs[j]) 1464 cov = cov + cov.T - np.diag(np.diag(cov)) 1465 1466 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1467 1468 if isinstance(smooth, int): 1469 corr = _smooth_eigenvalues(corr, smooth) 1470 1471 if visualize: 1472 plt.matshow(corr, vmin=-1, vmax=1) 1473 plt.set_cmap('RdBu') 1474 plt.colorbar() 1475 plt.draw() 1476 1477 if correlation is True: 1478 return corr 1479 1480 errors = [o.dvalue for o in obs] 1481 cov = np.diag(errors) @ corr @ np.diag(errors) 1482 1483 eigenvalues = np.linalg.eigh(cov)[0] 1484 if not np.all(eigenvalues >= 0): 1485 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1486 1487 return cov
Calculates the error covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1567def import_jackknife(jacks, name, idl=None): 1568 """Imports jackknife samples and returns an Obs 1569 1570 Parameters 1571 ---------- 1572 jacks : numpy.ndarray 1573 numpy array containing the mean value as zeroth entry and 1574 the N jackknife samples as first to Nth entry. 1575 name : str 1576 name of the ensemble the samples are defined on. 1577 """ 1578 length = len(jacks) - 1 1579 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1580 samples = jacks[1:] @ prj 1581 mean = np.mean(samples) 1582 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1583 new_obs._value = jacks[0] 1584 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1587def merge_obs(list_of_obs): 1588 """Combine all observables in list_of_obs into one new observable 1589 1590 Parameters 1591 ---------- 1592 list_of_obs : list 1593 list of the Obs object to be combined 1594 1595 Notes 1596 ----- 1597 It is not possible to combine obs which are based on the same replicum 1598 """ 1599 replist = [item for obs in list_of_obs for item in obs.names] 1600 if (len(replist) == len(set(replist))) is False: 1601 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1602 if any([len(o.cov_names) for o in list_of_obs]): 1603 raise Exception('Not possible to merge data that contains covobs!') 1604 new_dict = {} 1605 idl_dict = {} 1606 for o in list_of_obs: 1607 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1608 for key in set(o.deltas) | set(o.r_values)}) 1609 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1610 1611 names = sorted(new_dict.keys()) 1612 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1613 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1614 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1615 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1618def cov_Obs(means, cov, name, grad=None): 1619 """Create an Obs based on mean(s) and a covariance matrix 1620 1621 Parameters 1622 ---------- 1623 mean : list of floats or float 1624 N mean value(s) of the new Obs 1625 cov : list or array 1626 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1627 name : str 1628 identifier for the covariance matrix 1629 grad : list or array 1630 Gradient of the Covobs wrt. the means belonging to cov. 1631 """ 1632 1633 def covobs_to_obs(co): 1634 """Make an Obs out of a Covobs 1635 1636 Parameters 1637 ---------- 1638 co : Covobs 1639 Covobs to be embedded into the Obs 1640 """ 1641 o = Obs([], [], means=[]) 1642 o._value = co.value 1643 o.names.append(co.name) 1644 o._covobs[co.name] = co 1645 o._dvalue = np.sqrt(co.errsq()) 1646 return o 1647 1648 ol = [] 1649 if isinstance(means, (float, int)): 1650 means = [means] 1651 1652 for i in range(len(means)): 1653 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1654 if ol[0].covobs[name].N != len(means): 1655 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1656 if len(ol) == 1: 1657 return ol[0] 1658 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.