pyerrors.obs
View Source
0import warnings 1import pickle 2import numpy as np 3import autograd.numpy as anp # Thinly-wrapped numpy 4from autograd import jacobian 5import matplotlib.pyplot as plt 6from scipy.stats import skew, skewtest, kurtosis, kurtosistest 7import numdifftools as nd 8from itertools import groupby 9from .covobs import Covobs 10 11 12class Obs: 13 """Class for a general observable. 14 15 Instances of Obs are the basic objects of a pyerrors error analysis. 16 They are initialized with a list which contains arrays of samples for 17 different ensembles/replica and another list of same length which contains 18 the names of the ensembles/replica. Mathematical operations can be 19 performed on instances. The result is another instance of Obs. The error of 20 an instance can be computed with the gamma_method. Also contains additional 21 methods for output and visualization of the error calculation. 22 23 Attributes 24 ---------- 25 S_global : float 26 Standard value for S (default 2.0) 27 S_dict : dict 28 Dictionary for S values. If an entry for a given ensemble 29 exists this overwrites the standard value for that ensemble. 30 tau_exp_global : float 31 Standard value for tau_exp (default 0.0) 32 tau_exp_dict : dict 33 Dictionary for tau_exp values. If an entry for a given ensemble exists 34 this overwrites the standard value for that ensemble. 35 N_sigma_global : float 36 Standard value for N_sigma (default 1.0) 37 N_sigma_dict : dict 38 Dictionary for N_sigma values. If an entry for a given ensemble exists 39 this overwrites the standard value for that ensemble. 40 """ 41 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 42 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 43 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 44 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 45 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 46 47 S_global = 2.0 48 S_dict = {} 49 tau_exp_global = 0.0 50 tau_exp_dict = {} 51 N_sigma_global = 1.0 52 N_sigma_dict = {} 53 filter_eps = 1e-10 54 55 def __init__(self, samples, names, idl=None, **kwargs): 56 """ Initialize Obs object. 57 58 Parameters 59 ---------- 60 samples : list 61 list of numpy arrays containing the Monte Carlo samples 62 names : list 63 list of strings labeling the individual samples 64 idl : list, optional 65 list of ranges or lists on which the samples are defined 66 """ 67 68 if kwargs.get("means") is None and len(samples): 69 if len(samples) != len(names): 70 raise Exception('Length of samples and names incompatible.') 71 if idl is not None: 72 if len(idl) != len(names): 73 raise Exception('Length of idl incompatible with samples and names.') 74 name_length = len(names) 75 if name_length > 1: 76 if name_length != len(set(names)): 77 raise Exception('names are not unique.') 78 if not all(isinstance(x, str) for x in names): 79 raise TypeError('All names have to be strings.') 80 else: 81 if not isinstance(names[0], str): 82 raise TypeError('All names have to be strings.') 83 if min(len(x) for x in samples) <= 4: 84 raise Exception('Samples have to have at least 5 entries.') 85 86 self.names = sorted(names) 87 self.shape = {} 88 self.r_values = {} 89 self.deltas = {} 90 self._covobs = {} 91 92 self._value = 0 93 self.N = 0 94 self.is_merged = {} 95 self.idl = {} 96 if idl is not None: 97 for name, idx in sorted(zip(names, idl)): 98 if isinstance(idx, range): 99 self.idl[name] = idx 100 elif isinstance(idx, (list, np.ndarray)): 101 dc = np.unique(np.diff(idx)) 102 if np.any(dc < 0): 103 raise Exception("Unsorted idx for idl[%s]" % (name)) 104 if len(dc) == 1: 105 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 106 else: 107 self.idl[name] = list(idx) 108 else: 109 raise Exception('incompatible type for idl[%s].' % (name)) 110 else: 111 for name, sample in sorted(zip(names, samples)): 112 self.idl[name] = range(1, len(sample) + 1) 113 114 if kwargs.get("means") is not None: 115 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 116 self.shape[name] = len(self.idl[name]) 117 self.N += self.shape[name] 118 self.r_values[name] = mean 119 self.deltas[name] = sample 120 else: 121 for name, sample in sorted(zip(names, samples)): 122 self.shape[name] = len(self.idl[name]) 123 self.N += self.shape[name] 124 if len(sample) != self.shape[name]: 125 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 126 self.r_values[name] = np.mean(sample) 127 self.deltas[name] = sample - self.r_values[name] 128 self._value += self.shape[name] * self.r_values[name] 129 self._value /= self.N 130 131 self._dvalue = 0.0 132 self.ddvalue = 0.0 133 self.reweighted = False 134 135 self.tag = None 136 137 @property 138 def value(self): 139 return self._value 140 141 @property 142 def dvalue(self): 143 return self._dvalue 144 145 @property 146 def e_names(self): 147 return sorted(set([o.split('|')[0] for o in self.names])) 148 149 @property 150 def cov_names(self): 151 return sorted(set([o for o in self.covobs.keys()])) 152 153 @property 154 def mc_names(self): 155 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 156 157 @property 158 def e_content(self): 159 res = {} 160 for e, e_name in enumerate(self.e_names): 161 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 162 if e_name in self.names: 163 res[e_name].append(e_name) 164 return res 165 166 @property 167 def covobs(self): 168 return self._covobs 169 170 def gamma_method(self, **kwargs): 171 """Estimate the error and related properties of the Obs. 172 173 Parameters 174 ---------- 175 S : float 176 specifies a custom value for the parameter S (default 2.0). 177 If set to 0 it is assumed that the data exhibits no 178 autocorrelation. In this case the error estimates coincides 179 with the sample standard error. 180 tau_exp : float 181 positive value triggers the critical slowing down analysis 182 (default 0.0). 183 N_sigma : float 184 number of standard deviations from zero until the tail is 185 attached to the autocorrelation function (default 1). 186 fft : bool 187 determines whether the fft algorithm is used for the computation 188 of the autocorrelation function (default True) 189 """ 190 191 e_content = self.e_content 192 self.e_dvalue = {} 193 self.e_ddvalue = {} 194 self.e_tauint = {} 195 self.e_dtauint = {} 196 self.e_windowsize = {} 197 self.e_n_tauint = {} 198 self.e_n_dtauint = {} 199 e_gamma = {} 200 self.e_rho = {} 201 self.e_drho = {} 202 self._dvalue = 0 203 self.ddvalue = 0 204 205 self.S = {} 206 self.tau_exp = {} 207 self.N_sigma = {} 208 209 if kwargs.get('fft') is False: 210 fft = False 211 else: 212 fft = True 213 214 def _parse_kwarg(kwarg_name): 215 if kwarg_name in kwargs: 216 tmp = kwargs.get(kwarg_name) 217 if isinstance(tmp, (int, float)): 218 if tmp < 0: 219 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 220 for e, e_name in enumerate(self.e_names): 221 getattr(self, kwarg_name)[e_name] = tmp 222 else: 223 raise TypeError(kwarg_name + ' is not in proper format.') 224 else: 225 for e, e_name in enumerate(self.e_names): 226 if e_name in getattr(Obs, kwarg_name + '_dict'): 227 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 228 else: 229 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 230 231 _parse_kwarg('S') 232 _parse_kwarg('tau_exp') 233 _parse_kwarg('N_sigma') 234 235 for e, e_name in enumerate(self.mc_names): 236 r_length = [] 237 for r_name in e_content[e_name]: 238 if isinstance(self.idl[r_name], range): 239 r_length.append(len(self.idl[r_name])) 240 else: 241 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 242 243 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 244 w_max = max(r_length) // 2 245 e_gamma[e_name] = np.zeros(w_max) 246 self.e_rho[e_name] = np.zeros(w_max) 247 self.e_drho[e_name] = np.zeros(w_max) 248 249 for r_name in e_content[e_name]: 250 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 251 252 gamma_div = np.zeros(w_max) 253 for r_name in e_content[e_name]: 254 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 255 gamma_div[gamma_div < 1] = 1.0 256 e_gamma[e_name] /= gamma_div[:w_max] 257 258 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 259 self.e_tauint[e_name] = 0.5 260 self.e_dtauint[e_name] = 0.0 261 self.e_dvalue[e_name] = 0.0 262 self.e_ddvalue[e_name] = 0.0 263 self.e_windowsize[e_name] = 0 264 continue 265 266 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 267 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 268 # Make sure no entry of tauint is smaller than 0.5 269 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 270 # hep-lat/0306017 eq. (42) 271 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 272 self.e_n_dtauint[e_name][0] = 0.0 273 274 def _compute_drho(i): 275 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 276 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 277 278 _compute_drho(1) 279 if self.tau_exp[e_name] > 0: 280 texp = self.tau_exp[e_name] 281 # Critical slowing down analysis 282 if w_max // 2 <= 1: 283 raise Exception("Need at least 8 samples for tau_exp error analysis") 284 for n in range(1, w_max // 2): 285 _compute_drho(n + 1) 286 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 287 # Bias correction hep-lat/0306017 eq. (49) included 288 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 289 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 290 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 291 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 292 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 293 self.e_windowsize[e_name] = n 294 break 295 else: 296 if self.S[e_name] == 0.0: 297 self.e_tauint[e_name] = 0.5 298 self.e_dtauint[e_name] = 0.0 299 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 300 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 301 self.e_windowsize[e_name] = 0 302 else: 303 # Standard automatic windowing procedure 304 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 305 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 306 for n in range(1, w_max): 307 if n < w_max // 2 - 2: 308 _compute_drho(n + 1) 309 if g_w[n - 1] < 0 or n >= w_max - 1: 310 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 311 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 312 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 313 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 314 self.e_windowsize[e_name] = n 315 break 316 317 self._dvalue += self.e_dvalue[e_name] ** 2 318 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 319 320 for e_name in self.cov_names: 321 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 322 self.e_ddvalue[e_name] = 0 323 self._dvalue += self.e_dvalue[e_name]**2 324 325 self._dvalue = np.sqrt(self._dvalue) 326 if self._dvalue == 0.0: 327 self.ddvalue = 0.0 328 else: 329 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 330 return 331 332 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 333 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 334 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 335 336 Parameters 337 ---------- 338 deltas : list 339 List of fluctuations 340 idx : list 341 List or range of configurations on which the deltas are defined. 342 shape : int 343 Number of configurations in idx. 344 w_max : int 345 Upper bound for the summation window. 346 fft : bool 347 determines whether the fft algorithm is used for the computation 348 of the autocorrelation function. 349 """ 350 gamma = np.zeros(w_max) 351 deltas = _expand_deltas(deltas, idx, shape) 352 new_shape = len(deltas) 353 if fft: 354 max_gamma = min(new_shape, w_max) 355 # The padding for the fft has to be even 356 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 357 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 358 else: 359 for n in range(w_max): 360 if new_shape - n >= 0: 361 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 362 363 return gamma 364 365 def details(self, ens_content=True): 366 """Output detailed properties of the Obs. 367 368 Parameters 369 ---------- 370 ens_content : bool 371 print details about the ensembles and replica if true. 372 """ 373 if self.tag is not None: 374 print("Description:", self.tag) 375 if not hasattr(self, 'e_dvalue'): 376 print('Result\t %3.8e' % (self.value)) 377 else: 378 if self.value == 0.0: 379 percentage = np.nan 380 else: 381 percentage = np.abs(self._dvalue / self.value) * 100 382 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 383 if len(self.e_names) > 1: 384 print(' Ensemble errors:') 385 for e_name in self.mc_names: 386 if len(self.e_names) > 1: 387 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 388 if self.tau_exp[e_name] > 0: 389 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 390 else: 391 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 392 for e_name in self.cov_names: 393 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 394 if ens_content is True: 395 if len(self.e_names) == 1: 396 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 397 else: 398 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 399 my_string_list = [] 400 for key, value in sorted(self.e_content.items()): 401 if key not in self.covobs: 402 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 403 if len(value) == 1: 404 my_string += f': {self.shape[value[0]]} configurations' 405 if isinstance(self.idl[value[0]], range): 406 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 407 else: 408 my_string += ' (irregular range)' 409 else: 410 sublist = [] 411 for v in value: 412 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 413 my_substring += f': {self.shape[v]} configurations' 414 if isinstance(self.idl[v], range): 415 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 416 else: 417 my_substring += ' (irregular range)' 418 sublist.append(my_substring) 419 420 my_string += '\n' + '\n'.join(sublist) 421 else: 422 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 423 my_string_list.append(my_string) 424 print('\n'.join(my_string_list)) 425 426 def is_zero_within_error(self, sigma=1): 427 """Checks whether the observable is zero within 'sigma' standard errors. 428 429 Parameters 430 ---------- 431 sigma : int 432 Number of standard errors used for the check. 433 434 Works only properly when the gamma method was run. 435 """ 436 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 437 438 def is_zero(self, atol=1e-10): 439 """Checks whether the observable is zero within a given tolerance. 440 441 Parameters 442 ---------- 443 atol : float 444 Absolute tolerance (for details see numpy documentation). 445 """ 446 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 447 448 def plot_tauint(self, save=None): 449 """Plot integrated autocorrelation time for each ensemble. 450 451 Parameters 452 ---------- 453 save : str 454 saves the figure to a file named 'save' if. 455 """ 456 if not hasattr(self, 'e_dvalue'): 457 raise Exception('Run the gamma method first.') 458 459 for e, e_name in enumerate(self.mc_names): 460 fig = plt.figure() 461 plt.xlabel(r'$W$') 462 plt.ylabel(r'$\tau_\mathrm{int}$') 463 length = int(len(self.e_n_tauint[e_name])) 464 if self.tau_exp[e_name] > 0: 465 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 466 x_help = np.arange(2 * self.tau_exp[e_name]) 467 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 468 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 469 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 470 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 471 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 472 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 473 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 474 else: 475 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 476 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 477 478 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 479 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 480 plt.legend() 481 plt.xlim(-0.5, xmax) 482 ylim = plt.ylim() 483 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 484 plt.draw() 485 if save: 486 fig.savefig(save + "_" + str(e)) 487 488 def plot_rho(self, save=None): 489 """Plot normalized autocorrelation function time for each ensemble. 490 491 Parameters 492 ---------- 493 save : str 494 saves the figure to a file named 'save' if. 495 """ 496 if not hasattr(self, 'e_dvalue'): 497 raise Exception('Run the gamma method first.') 498 for e, e_name in enumerate(self.mc_names): 499 fig = plt.figure() 500 plt.xlabel('W') 501 plt.ylabel('rho') 502 length = int(len(self.e_drho[e_name])) 503 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 504 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 505 if self.tau_exp[e_name] > 0: 506 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 507 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 508 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 509 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 510 else: 511 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 512 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 513 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 514 plt.xlim(-0.5, xmax) 515 plt.draw() 516 if save: 517 fig.savefig(save + "_" + str(e)) 518 519 def plot_rep_dist(self): 520 """Plot replica distribution for each ensemble with more than one replicum.""" 521 if not hasattr(self, 'e_dvalue'): 522 raise Exception('Run the gamma method first.') 523 for e, e_name in enumerate(self.mc_names): 524 if len(self.e_content[e_name]) == 1: 525 print('No replica distribution for a single replicum (', e_name, ')') 526 continue 527 r_length = [] 528 sub_r_mean = 0 529 for r, r_name in enumerate(self.e_content[e_name]): 530 r_length.append(len(self.deltas[r_name])) 531 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 532 e_N = np.sum(r_length) 533 sub_r_mean /= e_N 534 arr = np.zeros(len(self.e_content[e_name])) 535 for r, r_name in enumerate(self.e_content[e_name]): 536 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 537 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 538 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 539 plt.draw() 540 541 def plot_history(self, expand=True): 542 """Plot derived Monte Carlo history for each ensemble 543 544 Parameters 545 ---------- 546 expand : bool 547 show expanded history for irregular Monte Carlo chains (default: True). 548 """ 549 for e, e_name in enumerate(self.mc_names): 550 plt.figure() 551 r_length = [] 552 tmp = [] 553 tmp_expanded = [] 554 for r, r_name in enumerate(self.e_content[e_name]): 555 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 556 if expand: 557 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 558 r_length.append(len(tmp_expanded[-1])) 559 else: 560 r_length.append(len(tmp[-1])) 561 e_N = np.sum(r_length) 562 x = np.arange(e_N) 563 y_test = np.concatenate(tmp, axis=0) 564 if expand: 565 y = np.concatenate(tmp_expanded, axis=0) 566 else: 567 y = y_test 568 plt.errorbar(x, y, fmt='.', markersize=3) 569 plt.xlim(-0.5, e_N - 0.5) 570 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 571 plt.draw() 572 573 def plot_piechart(self): 574 """Plot piechart which shows the fractional contribution of each 575 ensemble to the error and returns a dictionary containing the fractions.""" 576 if not hasattr(self, 'e_dvalue'): 577 raise Exception('Run the gamma method first.') 578 if np.isclose(0.0, self._dvalue, atol=1e-15): 579 raise Exception('Error is 0.0') 580 labels = self.e_names 581 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 582 fig1, ax1 = plt.subplots() 583 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 584 ax1.axis('equal') 585 plt.draw() 586 587 return dict(zip(self.e_names, sizes)) 588 589 def dump(self, filename, datatype="json.gz", description="", **kwargs): 590 """Dump the Obs to a file 'name' of chosen format. 591 592 Parameters 593 ---------- 594 filename : str 595 name of the file to be saved. 596 datatype : str 597 Format of the exported file. Supported formats include 598 "json.gz" and "pickle" 599 description : str 600 Description for output file, only relevant for json.gz format. 601 path : str 602 specifies a custom path for the file (default '.') 603 """ 604 if 'path' in kwargs: 605 file_name = kwargs.get('path') + '/' + filename 606 else: 607 file_name = filename 608 609 if datatype == "json.gz": 610 from .input.json import dump_to_json 611 dump_to_json([self], file_name, description=description) 612 elif datatype == "pickle": 613 with open(file_name + '.p', 'wb') as fb: 614 pickle.dump(self, fb) 615 else: 616 raise Exception("Unknown datatype " + str(datatype)) 617 618 def export_jackknife(self): 619 """Export jackknife samples from the Obs 620 621 Returns 622 ------- 623 numpy.ndarray 624 Returns a numpy array of length N + 1 where N is the number of samples 625 for the given ensemble and replicum. The zeroth entry of the array contains 626 the mean value of the Obs, entries 1 to N contain the N jackknife samples 627 derived from the Obs. The current implementation only works for observables 628 defined on exactly one ensemble and replicum. The derived jackknife samples 629 should agree with samples from a full jackknife analysis up to O(1/N). 630 """ 631 632 if len(self.names) != 1: 633 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 634 635 name = self.names[0] 636 full_data = self.deltas[name] + self.r_values[name] 637 n = full_data.size 638 mean = self.value 639 tmp_jacks = np.zeros(n + 1) 640 tmp_jacks[0] = mean 641 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 642 return tmp_jacks 643 644 def __float__(self): 645 return float(self.value) 646 647 def __repr__(self): 648 return 'Obs[' + str(self) + ']' 649 650 def __str__(self): 651 if self._dvalue == 0.0: 652 return str(self.value) 653 fexp = np.floor(np.log10(self._dvalue)) 654 if fexp < 0.0: 655 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 656 elif fexp == 0.0: 657 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 658 else: 659 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 660 661 # Overload comparisons 662 def __lt__(self, other): 663 return self.value < other 664 665 def __le__(self, other): 666 return self.value <= other 667 668 def __gt__(self, other): 669 return self.value > other 670 671 def __ge__(self, other): 672 return self.value >= other 673 674 def __eq__(self, other): 675 return (self - other).is_zero() 676 677 def __ne__(self, other): 678 return not (self - other).is_zero() 679 680 # Overload math operations 681 def __add__(self, y): 682 if isinstance(y, Obs): 683 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 684 else: 685 if isinstance(y, np.ndarray): 686 return np.array([self + o for o in y]) 687 elif y.__class__.__name__ in ['Corr', 'CObs']: 688 return NotImplemented 689 else: 690 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 691 692 def __radd__(self, y): 693 return self + y 694 695 def __mul__(self, y): 696 if isinstance(y, Obs): 697 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 698 else: 699 if isinstance(y, np.ndarray): 700 return np.array([self * o for o in y]) 701 elif isinstance(y, complex): 702 return CObs(self * y.real, self * y.imag) 703 elif y.__class__.__name__ in ['Corr', 'CObs']: 704 return NotImplemented 705 else: 706 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 707 708 def __rmul__(self, y): 709 return self * y 710 711 def __sub__(self, y): 712 if isinstance(y, Obs): 713 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 714 else: 715 if isinstance(y, np.ndarray): 716 return np.array([self - o for o in y]) 717 elif y.__class__.__name__ in ['Corr', 'CObs']: 718 return NotImplemented 719 else: 720 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 721 722 def __rsub__(self, y): 723 return -1 * (self - y) 724 725 def __pos__(self): 726 return self 727 728 def __neg__(self): 729 return -1 * self 730 731 def __truediv__(self, y): 732 if isinstance(y, Obs): 733 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 734 else: 735 if isinstance(y, np.ndarray): 736 return np.array([self / o for o in y]) 737 elif y.__class__.__name__ in ['Corr', 'CObs']: 738 return NotImplemented 739 else: 740 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 741 742 def __rtruediv__(self, y): 743 if isinstance(y, Obs): 744 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 745 else: 746 if isinstance(y, np.ndarray): 747 return np.array([o / self for o in y]) 748 elif y.__class__.__name__ in ['Corr', 'CObs']: 749 return NotImplemented 750 else: 751 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 752 753 def __pow__(self, y): 754 if isinstance(y, Obs): 755 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 756 else: 757 return derived_observable(lambda x: x[0] ** y, [self]) 758 759 def __rpow__(self, y): 760 if isinstance(y, Obs): 761 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 762 else: 763 return derived_observable(lambda x: y ** x[0], [self]) 764 765 def __abs__(self): 766 return derived_observable(lambda x: anp.abs(x[0]), [self]) 767 768 # Overload numpy functions 769 def sqrt(self): 770 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 771 772 def log(self): 773 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 774 775 def exp(self): 776 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 777 778 def sin(self): 779 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 780 781 def cos(self): 782 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 783 784 def tan(self): 785 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 786 787 def arcsin(self): 788 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 789 790 def arccos(self): 791 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 792 793 def arctan(self): 794 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 795 796 def sinh(self): 797 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 798 799 def cosh(self): 800 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 801 802 def tanh(self): 803 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 804 805 def arcsinh(self): 806 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 807 808 def arccosh(self): 809 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 810 811 def arctanh(self): 812 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 813 814 815class CObs: 816 """Class for a complex valued observable.""" 817 __slots__ = ['_real', '_imag', 'tag'] 818 819 def __init__(self, real, imag=0.0): 820 self._real = real 821 self._imag = imag 822 self.tag = None 823 824 @property 825 def real(self): 826 return self._real 827 828 @property 829 def imag(self): 830 return self._imag 831 832 def gamma_method(self, **kwargs): 833 """Executes the gamma_method for the real and the imaginary part.""" 834 if isinstance(self.real, Obs): 835 self.real.gamma_method(**kwargs) 836 if isinstance(self.imag, Obs): 837 self.imag.gamma_method(**kwargs) 838 839 def is_zero(self): 840 """Checks whether both real and imaginary part are zero within machine precision.""" 841 return self.real == 0.0 and self.imag == 0.0 842 843 def conjugate(self): 844 return CObs(self.real, -self.imag) 845 846 def __add__(self, other): 847 if isinstance(other, np.ndarray): 848 return other + self 849 elif hasattr(other, 'real') and hasattr(other, 'imag'): 850 return CObs(self.real + other.real, 851 self.imag + other.imag) 852 else: 853 return CObs(self.real + other, self.imag) 854 855 def __radd__(self, y): 856 return self + y 857 858 def __sub__(self, other): 859 if isinstance(other, np.ndarray): 860 return -1 * (other - self) 861 elif hasattr(other, 'real') and hasattr(other, 'imag'): 862 return CObs(self.real - other.real, self.imag - other.imag) 863 else: 864 return CObs(self.real - other, self.imag) 865 866 def __rsub__(self, other): 867 return -1 * (self - other) 868 869 def __mul__(self, other): 870 if isinstance(other, np.ndarray): 871 return other * self 872 elif hasattr(other, 'real') and hasattr(other, 'imag'): 873 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 874 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 875 [self.real, other.real, self.imag, other.imag], 876 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 877 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 878 [self.real, other.real, self.imag, other.imag], 879 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 880 elif getattr(other, 'imag', 0) != 0: 881 return CObs(self.real * other.real - self.imag * other.imag, 882 self.imag * other.real + self.real * other.imag) 883 else: 884 return CObs(self.real * other.real, self.imag * other.real) 885 else: 886 return CObs(self.real * other, self.imag * other) 887 888 def __rmul__(self, other): 889 return self * other 890 891 def __truediv__(self, other): 892 if isinstance(other, np.ndarray): 893 return 1 / (other / self) 894 elif hasattr(other, 'real') and hasattr(other, 'imag'): 895 r = other.real ** 2 + other.imag ** 2 896 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 897 else: 898 return CObs(self.real / other, self.imag / other) 899 900 def __rtruediv__(self, other): 901 r = self.real ** 2 + self.imag ** 2 902 if hasattr(other, 'real') and hasattr(other, 'imag'): 903 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 904 else: 905 return CObs(self.real * other / r, -self.imag * other / r) 906 907 def __abs__(self): 908 return np.sqrt(self.real**2 + self.imag**2) 909 910 def __pos__(self): 911 return self 912 913 def __neg__(self): 914 return -1 * self 915 916 def __eq__(self, other): 917 return self.real == other.real and self.imag == other.imag 918 919 def __str__(self): 920 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 921 922 def __repr__(self): 923 return 'CObs[' + str(self) + ']' 924 925 926def _expand_deltas(deltas, idx, shape): 927 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 928 If idx is of type range, the deltas are not changed 929 930 Parameters 931 ---------- 932 deltas : list 933 List of fluctuations 934 idx : list 935 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 936 shape : int 937 Number of configs in idx. 938 """ 939 if isinstance(idx, range): 940 return deltas 941 else: 942 ret = np.zeros(idx[-1] - idx[0] + 1) 943 for i in range(shape): 944 ret[idx[i] - idx[0]] = deltas[i] 945 return ret 946 947 948def _merge_idx(idl): 949 """Returns the union of all lists in idl as sorted list 950 951 Parameters 952 ---------- 953 idl : list 954 List of lists or ranges. 955 """ 956 957 # Use groupby to efficiently check whether all elements of idl are identical 958 try: 959 g = groupby(idl) 960 if next(g, True) and not next(g, False): 961 return idl[0] 962 except Exception: 963 pass 964 965 if np.all([type(idx) is range for idx in idl]): 966 if len(set([idx[0] for idx in idl])) == 1: 967 idstart = min([idx.start for idx in idl]) 968 idstop = max([idx.stop for idx in idl]) 969 idstep = min([idx.step for idx in idl]) 970 return range(idstart, idstop, idstep) 971 972 return sorted(set().union(*idl)) 973 974 975def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 976 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 977 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 978 common divisor of the step sizes is used as new step size. 979 980 Parameters 981 ---------- 982 deltas : list 983 List of fluctuations 984 idx : list 985 List or range of configs on which the deltas are defined. 986 Has to be a subset of new_idx and has to be sorted in ascending order. 987 shape : list 988 Number of configs in idx. 989 new_idx : list 990 List of configs that defines the new range, has to be sorted in ascending order. 991 """ 992 993 if type(idx) is range and type(new_idx) is range: 994 if idx == new_idx: 995 return deltas 996 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 997 for i in range(shape): 998 ret[idx[i] - new_idx[0]] = deltas[i] 999 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1000 1001 1002def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1003 """Filter out all configurations with vanishing fluctuation such that they do not 1004 contribute to the error estimate anymore. Returns the new deltas and 1005 idx according to the filtering. 1006 A fluctuation is considered to be vanishing, if it is smaller than eps times 1007 the mean of the absolute values of all deltas in one list. 1008 1009 Parameters 1010 ---------- 1011 deltas : list 1012 List of fluctuations 1013 idx : list 1014 List or ranges of configs on which the deltas are defined. 1015 eps : float 1016 Prefactor that enters the filter criterion. 1017 """ 1018 new_deltas = [] 1019 new_idx = [] 1020 maxd = np.mean(np.fabs(deltas)) 1021 for i in range(len(deltas)): 1022 if abs(deltas[i]) > eps * maxd: 1023 new_deltas.append(deltas[i]) 1024 new_idx.append(idx[i]) 1025 if new_idx: 1026 return np.array(new_deltas), new_idx 1027 else: 1028 return deltas, idx 1029 1030 1031def derived_observable(func, data, array_mode=False, **kwargs): 1032 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1033 1034 Parameters 1035 ---------- 1036 func : object 1037 arbitrary function of the form func(data, **kwargs). For the 1038 automatic differentiation to work, all numpy functions have to have 1039 the autograd wrapper (use 'import autograd.numpy as anp'). 1040 data : list 1041 list of Obs, e.g. [obs1, obs2, obs3]. 1042 num_grad : bool 1043 if True, numerical derivatives are used instead of autograd 1044 (default False). To control the numerical differentiation the 1045 kwargs of numdifftools.step_generators.MaxStepGenerator 1046 can be used. 1047 man_grad : list 1048 manually supply a list or an array which contains the jacobian 1049 of func. Use cautiously, supplying the wrong derivative will 1050 not be intercepted. 1051 1052 Notes 1053 ----- 1054 For simple mathematical operations it can be practical to use anonymous 1055 functions. For the ratio of two observables one can e.g. use 1056 1057 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1058 """ 1059 1060 data = np.asarray(data) 1061 raveled_data = data.ravel() 1062 1063 # Workaround for matrix operations containing non Obs data 1064 if not all(isinstance(x, Obs) for x in raveled_data): 1065 for i in range(len(raveled_data)): 1066 if isinstance(raveled_data[i], (int, float)): 1067 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1068 1069 allcov = {} 1070 for o in raveled_data: 1071 for name in o.cov_names: 1072 if name in allcov: 1073 if not np.allclose(allcov[name], o.covobs[name].cov): 1074 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1075 else: 1076 allcov[name] = o.covobs[name].cov 1077 1078 n_obs = len(raveled_data) 1079 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1080 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1081 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1082 1083 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1084 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1085 1086 if data.ndim == 1: 1087 values = np.array([o.value for o in data]) 1088 else: 1089 values = np.vectorize(lambda x: x.value)(data) 1090 1091 new_values = func(values, **kwargs) 1092 1093 multi = int(isinstance(new_values, np.ndarray)) 1094 1095 new_r_values = {} 1096 new_idl_d = {} 1097 for name in new_sample_names: 1098 idl = [] 1099 tmp_values = np.zeros(n_obs) 1100 for i, item in enumerate(raveled_data): 1101 tmp_values[i] = item.r_values.get(name, item.value) 1102 tmp_idl = item.idl.get(name) 1103 if tmp_idl is not None: 1104 idl.append(tmp_idl) 1105 if multi > 0: 1106 tmp_values = np.array(tmp_values).reshape(data.shape) 1107 new_r_values[name] = func(tmp_values, **kwargs) 1108 new_idl_d[name] = _merge_idx(idl) 1109 if not is_merged[name]: 1110 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1111 1112 if 'man_grad' in kwargs: 1113 deriv = np.asarray(kwargs.get('man_grad')) 1114 if new_values.shape + data.shape != deriv.shape: 1115 raise Exception('Manual derivative does not have correct shape.') 1116 elif kwargs.get('num_grad') is True: 1117 if multi > 0: 1118 raise Exception('Multi mode currently not supported for numerical derivative') 1119 options = { 1120 'base_step': 0.1, 1121 'step_ratio': 2.5} 1122 for key in options.keys(): 1123 kwarg = kwargs.get(key) 1124 if kwarg is not None: 1125 options[key] = kwarg 1126 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1127 if tmp_df.size == 1: 1128 deriv = np.array([tmp_df.real]) 1129 else: 1130 deriv = tmp_df.real 1131 else: 1132 deriv = jacobian(func)(values, **kwargs) 1133 1134 final_result = np.zeros(new_values.shape, dtype=object) 1135 1136 if array_mode is True: 1137 1138 class _Zero_grad(): 1139 def __init__(self, N): 1140 self.grad = np.zeros((N, 1)) 1141 1142 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1143 d_extracted = {} 1144 g_extracted = {} 1145 for name in new_sample_names: 1146 d_extracted[name] = [] 1147 ens_length = len(new_idl_d[name]) 1148 for i_dat, dat in enumerate(data): 1149 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1150 for name in new_cov_names: 1151 g_extracted[name] = [] 1152 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1153 for i_dat, dat in enumerate(data): 1154 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1155 1156 for i_val, new_val in np.ndenumerate(new_values): 1157 new_deltas = {} 1158 new_grad = {} 1159 if array_mode is True: 1160 for name in new_sample_names: 1161 ens_length = d_extracted[name][0].shape[-1] 1162 new_deltas[name] = np.zeros(ens_length) 1163 for i_dat, dat in enumerate(d_extracted[name]): 1164 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1165 for name in new_cov_names: 1166 new_grad[name] = 0 1167 for i_dat, dat in enumerate(g_extracted[name]): 1168 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1169 else: 1170 for j_obs, obs in np.ndenumerate(data): 1171 for name in obs.names: 1172 if name in obs.cov_names: 1173 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1174 else: 1175 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1176 1177 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1178 1179 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1180 raise Exception('The same name has been used for deltas and covobs!') 1181 new_samples = [] 1182 new_means = [] 1183 new_idl = [] 1184 new_names_obs = [] 1185 for name in new_names: 1186 if name not in new_covobs: 1187 if is_merged[name]: 1188 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1189 else: 1190 filtered_deltas = new_deltas[name] 1191 filtered_idl_d = new_idl_d[name] 1192 1193 new_samples.append(filtered_deltas) 1194 new_idl.append(filtered_idl_d) 1195 new_means.append(new_r_values[name][i_val]) 1196 new_names_obs.append(name) 1197 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1198 for name in new_covobs: 1199 final_result[i_val].names.append(name) 1200 final_result[i_val]._covobs = new_covobs 1201 final_result[i_val]._value = new_val 1202 final_result[i_val].is_merged = is_merged 1203 final_result[i_val].reweighted = reweighted 1204 1205 if multi == 0: 1206 final_result = final_result.item() 1207 1208 return final_result 1209 1210 1211def _reduce_deltas(deltas, idx_old, idx_new): 1212 """Extract deltas defined on idx_old on all configs of idx_new. 1213 1214 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1215 are ordered in an ascending order. 1216 1217 Parameters 1218 ---------- 1219 deltas : list 1220 List of fluctuations 1221 idx_old : list 1222 List or range of configs on which the deltas are defined 1223 idx_new : list 1224 List of configs for which we want to extract the deltas. 1225 Has to be a subset of idx_old. 1226 """ 1227 if not len(deltas) == len(idx_old): 1228 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1229 if type(idx_old) is range and type(idx_new) is range: 1230 if idx_old == idx_new: 1231 return deltas 1232 shape = len(idx_new) 1233 ret = np.zeros(shape) 1234 oldpos = 0 1235 for i in range(shape): 1236 pos = -1 1237 for j in range(oldpos, len(idx_old)): 1238 if idx_old[j] == idx_new[i]: 1239 pos = j 1240 break 1241 if pos < 0: 1242 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1243 ret[i] = deltas[pos] 1244 oldpos = pos 1245 return np.array(ret) 1246 1247 1248def reweight(weight, obs, **kwargs): 1249 """Reweight a list of observables. 1250 1251 Parameters 1252 ---------- 1253 weight : Obs 1254 Reweighting factor. An Observable that has to be defined on a superset of the 1255 configurations in obs[i].idl for all i. 1256 obs : list 1257 list of Obs, e.g. [obs1, obs2, obs3]. 1258 all_configs : bool 1259 if True, the reweighted observables are normalized by the average of 1260 the reweighting factor on all configurations in weight.idl and not 1261 on the configurations in obs[i].idl. 1262 """ 1263 result = [] 1264 for i in range(len(obs)): 1265 if len(obs[i].cov_names): 1266 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1267 if not set(obs[i].names).issubset(weight.names): 1268 raise Exception('Error: Ensembles do not fit') 1269 for name in obs[i].names: 1270 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1271 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1272 new_samples = [] 1273 w_deltas = {} 1274 for name in sorted(obs[i].names): 1275 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1276 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1277 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1278 1279 if kwargs.get('all_configs'): 1280 new_weight = weight 1281 else: 1282 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1283 1284 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1285 result[-1].reweighted = True 1286 result[-1].is_merged = obs[i].is_merged 1287 1288 return result 1289 1290 1291def correlate(obs_a, obs_b): 1292 """Correlate two observables. 1293 1294 Parameters 1295 ---------- 1296 obs_a : Obs 1297 First observable 1298 obs_b : Obs 1299 Second observable 1300 1301 Notes 1302 ----- 1303 Keep in mind to only correlate primary observables which have not been reweighted 1304 yet. The reweighting has to be applied after correlating the observables. 1305 Currently only works if ensembles are identical (this is not strictly necessary). 1306 """ 1307 1308 if sorted(obs_a.names) != sorted(obs_b.names): 1309 raise Exception('Ensembles do not fit') 1310 if len(obs_a.cov_names) or len(obs_b.cov_names): 1311 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1312 for name in obs_a.names: 1313 if obs_a.shape[name] != obs_b.shape[name]: 1314 raise Exception('Shapes of ensemble', name, 'do not fit') 1315 if obs_a.idl[name] != obs_b.idl[name]: 1316 raise Exception('idl of ensemble', name, 'do not fit') 1317 1318 if obs_a.reweighted is True: 1319 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1320 if obs_b.reweighted is True: 1321 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1322 1323 new_samples = [] 1324 new_idl = [] 1325 for name in sorted(obs_a.names): 1326 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1327 new_idl.append(obs_a.idl[name]) 1328 1329 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1330 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1331 o.reweighted = obs_a.reweighted or obs_b.reweighted 1332 return o 1333 1334 1335def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1336 r'''Calculates the covariance matrix of a set of observables. 1337 1338 The gamma method has to be applied first to all observables. 1339 1340 Parameters 1341 ---------- 1342 obs : list or numpy.ndarray 1343 List or one dimensional array of Obs 1344 visualize : bool 1345 If True plots the corresponding normalized correlation matrix (default False). 1346 correlation : bool 1347 If True the correlation instead of the covariance is returned (default False). 1348 smooth : None or int 1349 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1350 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1351 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1352 small ones. 1353 1354 Notes 1355 ----- 1356 The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1357 $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1358 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1359 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1360 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1361 ''' 1362 1363 length = len(obs) 1364 1365 max_samples = np.max([o.N for o in obs]) 1366 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1367 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1368 1369 cov = np.zeros((length, length)) 1370 for i in range(length): 1371 for j in range(i, length): 1372 cov[i, j] = _covariance_element(obs[i], obs[j]) 1373 cov = cov + cov.T - np.diag(np.diag(cov)) 1374 1375 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1376 1377 if isinstance(smooth, int): 1378 corr = _smooth_eigenvalues(corr, smooth) 1379 1380 errors = [o.dvalue for o in obs] 1381 cov = np.diag(errors) @ corr @ np.diag(errors) 1382 1383 eigenvalues = np.linalg.eigh(cov)[0] 1384 if not np.all(eigenvalues >= 0): 1385 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1386 1387 if visualize: 1388 plt.matshow(corr, vmin=-1, vmax=1) 1389 plt.set_cmap('RdBu') 1390 plt.colorbar() 1391 plt.draw() 1392 1393 if correlation is True: 1394 return corr 1395 else: 1396 return cov 1397 1398 1399def _smooth_eigenvalues(corr, E): 1400 """Eigenvalue smoothing as described in hep-lat/9412087 1401 1402 corr : np.ndarray 1403 correlation matrix 1404 E : integer 1405 Number of eigenvalues to be left substantially unchanged 1406 """ 1407 if not (2 < E < corr.shape[0] - 1): 1408 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1409 vals, vec = np.linalg.eigh(corr) 1410 lambda_min = np.mean(vals[:-E]) 1411 vals[vals < lambda_min] = lambda_min 1412 vals /= np.mean(vals) 1413 return vec @ np.diag(vals) @ vec.T 1414 1415 1416def _covariance_element(obs1, obs2): 1417 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1418 1419 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1420 deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1421 deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1422 return np.sum(deltas1 * deltas2) 1423 1424 if set(obs1.names).isdisjoint(set(obs2.names)): 1425 return 0.0 1426 1427 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1428 raise Exception('The gamma method has to be applied to both Obs first.') 1429 1430 dvalue = 0.0 1431 1432 for e_name in obs1.mc_names: 1433 1434 if e_name not in obs2.mc_names: 1435 continue 1436 1437 idl_d = {} 1438 for r_name in obs1.e_content[e_name]: 1439 if r_name not in obs2.e_content[e_name]: 1440 continue 1441 idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1442 1443 gamma = 0.0 1444 1445 for r_name in obs1.e_content[e_name]: 1446 if r_name not in obs2.e_content[e_name]: 1447 continue 1448 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1449 1450 if gamma == 0.0: 1451 continue 1452 1453 gamma_div = 0.0 1454 e_N = 0 1455 for r_name in obs1.e_content[e_name]: 1456 if r_name not in obs2.e_content[e_name]: 1457 continue 1458 gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1459 e_N += len(idl_d[r_name]) 1460 gamma /= max(gamma_div, 1.0) 1461 1462 # Bias correction hep-lat/0306017 eq. (49) 1463 dvalue += (1 + 1 / e_N) * gamma / e_N 1464 1465 for e_name in obs1.cov_names: 1466 1467 if e_name not in obs2.cov_names: 1468 continue 1469 1470 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1471 1472 return dvalue 1473 1474 1475def import_jackknife(jacks, name, idl=None): 1476 """Imports jackknife samples and returns an Obs 1477 1478 Parameters 1479 ---------- 1480 jacks : numpy.ndarray 1481 numpy array containing the mean value as zeroth entry and 1482 the N jackknife samples as first to Nth entry. 1483 name : str 1484 name of the ensemble the samples are defined on. 1485 """ 1486 length = len(jacks) - 1 1487 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1488 samples = jacks[1:] @ prj 1489 mean = np.mean(samples) 1490 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1491 new_obs._value = jacks[0] 1492 return new_obs 1493 1494 1495def merge_obs(list_of_obs): 1496 """Combine all observables in list_of_obs into one new observable 1497 1498 Parameters 1499 ---------- 1500 list_of_obs : list 1501 list of the Obs object to be combined 1502 1503 Notes 1504 ----- 1505 It is not possible to combine obs which are based on the same replicum 1506 """ 1507 replist = [item for obs in list_of_obs for item in obs.names] 1508 if (len(replist) == len(set(replist))) is False: 1509 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1510 if any([len(o.cov_names) for o in list_of_obs]): 1511 raise Exception('Not possible to merge data that contains covobs!') 1512 new_dict = {} 1513 idl_dict = {} 1514 for o in list_of_obs: 1515 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1516 for key in set(o.deltas) | set(o.r_values)}) 1517 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1518 1519 names = sorted(new_dict.keys()) 1520 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1521 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1522 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1523 return o 1524 1525 1526def cov_Obs(means, cov, name, grad=None): 1527 """Create an Obs based on mean(s) and a covariance matrix 1528 1529 Parameters 1530 ---------- 1531 mean : list of floats or float 1532 N mean value(s) of the new Obs 1533 cov : list or array 1534 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1535 name : str 1536 identifier for the covariance matrix 1537 grad : list or array 1538 Gradient of the Covobs wrt. the means belonging to cov. 1539 """ 1540 1541 def covobs_to_obs(co): 1542 """Make an Obs out of a Covobs 1543 1544 Parameters 1545 ---------- 1546 co : Covobs 1547 Covobs to be embedded into the Obs 1548 """ 1549 o = Obs([], [], means=[]) 1550 o._value = co.value 1551 o.names.append(co.name) 1552 o._covobs[co.name] = co 1553 o._dvalue = np.sqrt(co.errsq()) 1554 return o 1555 1556 ol = [] 1557 if isinstance(means, (float, int)): 1558 means = [means] 1559 1560 for i in range(len(means)): 1561 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1562 if ol[0].covobs[name].N != len(means): 1563 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1564 if len(ol) == 1: 1565 return ol[0] 1566 return ol
View Source
13class Obs: 14 """Class for a general observable. 15 16 Instances of Obs are the basic objects of a pyerrors error analysis. 17 They are initialized with a list which contains arrays of samples for 18 different ensembles/replica and another list of same length which contains 19 the names of the ensembles/replica. Mathematical operations can be 20 performed on instances. The result is another instance of Obs. The error of 21 an instance can be computed with the gamma_method. Also contains additional 22 methods for output and visualization of the error calculation. 23 24 Attributes 25 ---------- 26 S_global : float 27 Standard value for S (default 2.0) 28 S_dict : dict 29 Dictionary for S values. If an entry for a given ensemble 30 exists this overwrites the standard value for that ensemble. 31 tau_exp_global : float 32 Standard value for tau_exp (default 0.0) 33 tau_exp_dict : dict 34 Dictionary for tau_exp values. If an entry for a given ensemble exists 35 this overwrites the standard value for that ensemble. 36 N_sigma_global : float 37 Standard value for N_sigma (default 1.0) 38 N_sigma_dict : dict 39 Dictionary for N_sigma values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 """ 42 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 43 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 44 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 45 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 46 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 47 48 S_global = 2.0 49 S_dict = {} 50 tau_exp_global = 0.0 51 tau_exp_dict = {} 52 N_sigma_global = 1.0 53 N_sigma_dict = {} 54 filter_eps = 1e-10 55 56 def __init__(self, samples, names, idl=None, **kwargs): 57 """ Initialize Obs object. 58 59 Parameters 60 ---------- 61 samples : list 62 list of numpy arrays containing the Monte Carlo samples 63 names : list 64 list of strings labeling the individual samples 65 idl : list, optional 66 list of ranges or lists on which the samples are defined 67 """ 68 69 if kwargs.get("means") is None and len(samples): 70 if len(samples) != len(names): 71 raise Exception('Length of samples and names incompatible.') 72 if idl is not None: 73 if len(idl) != len(names): 74 raise Exception('Length of idl incompatible with samples and names.') 75 name_length = len(names) 76 if name_length > 1: 77 if name_length != len(set(names)): 78 raise Exception('names are not unique.') 79 if not all(isinstance(x, str) for x in names): 80 raise TypeError('All names have to be strings.') 81 else: 82 if not isinstance(names[0], str): 83 raise TypeError('All names have to be strings.') 84 if min(len(x) for x in samples) <= 4: 85 raise Exception('Samples have to have at least 5 entries.') 86 87 self.names = sorted(names) 88 self.shape = {} 89 self.r_values = {} 90 self.deltas = {} 91 self._covobs = {} 92 93 self._value = 0 94 self.N = 0 95 self.is_merged = {} 96 self.idl = {} 97 if idl is not None: 98 for name, idx in sorted(zip(names, idl)): 99 if isinstance(idx, range): 100 self.idl[name] = idx 101 elif isinstance(idx, (list, np.ndarray)): 102 dc = np.unique(np.diff(idx)) 103 if np.any(dc < 0): 104 raise Exception("Unsorted idx for idl[%s]" % (name)) 105 if len(dc) == 1: 106 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 107 else: 108 self.idl[name] = list(idx) 109 else: 110 raise Exception('incompatible type for idl[%s].' % (name)) 111 else: 112 for name, sample in sorted(zip(names, samples)): 113 self.idl[name] = range(1, len(sample) + 1) 114 115 if kwargs.get("means") is not None: 116 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 117 self.shape[name] = len(self.idl[name]) 118 self.N += self.shape[name] 119 self.r_values[name] = mean 120 self.deltas[name] = sample 121 else: 122 for name, sample in sorted(zip(names, samples)): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 if len(sample) != self.shape[name]: 126 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 127 self.r_values[name] = np.mean(sample) 128 self.deltas[name] = sample - self.r_values[name] 129 self._value += self.shape[name] * self.r_values[name] 130 self._value /= self.N 131 132 self._dvalue = 0.0 133 self.ddvalue = 0.0 134 self.reweighted = False 135 136 self.tag = None 137 138 @property 139 def value(self): 140 return self._value 141 142 @property 143 def dvalue(self): 144 return self._dvalue 145 146 @property 147 def e_names(self): 148 return sorted(set([o.split('|')[0] for o in self.names])) 149 150 @property 151 def cov_names(self): 152 return sorted(set([o for o in self.covobs.keys()])) 153 154 @property 155 def mc_names(self): 156 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 157 158 @property 159 def e_content(self): 160 res = {} 161 for e, e_name in enumerate(self.e_names): 162 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 163 if e_name in self.names: 164 res[e_name].append(e_name) 165 return res 166 167 @property 168 def covobs(self): 169 return self._covobs 170 171 def gamma_method(self, **kwargs): 172 """Estimate the error and related properties of the Obs. 173 174 Parameters 175 ---------- 176 S : float 177 specifies a custom value for the parameter S (default 2.0). 178 If set to 0 it is assumed that the data exhibits no 179 autocorrelation. In this case the error estimates coincides 180 with the sample standard error. 181 tau_exp : float 182 positive value triggers the critical slowing down analysis 183 (default 0.0). 184 N_sigma : float 185 number of standard deviations from zero until the tail is 186 attached to the autocorrelation function (default 1). 187 fft : bool 188 determines whether the fft algorithm is used for the computation 189 of the autocorrelation function (default True) 190 """ 191 192 e_content = self.e_content 193 self.e_dvalue = {} 194 self.e_ddvalue = {} 195 self.e_tauint = {} 196 self.e_dtauint = {} 197 self.e_windowsize = {} 198 self.e_n_tauint = {} 199 self.e_n_dtauint = {} 200 e_gamma = {} 201 self.e_rho = {} 202 self.e_drho = {} 203 self._dvalue = 0 204 self.ddvalue = 0 205 206 self.S = {} 207 self.tau_exp = {} 208 self.N_sigma = {} 209 210 if kwargs.get('fft') is False: 211 fft = False 212 else: 213 fft = True 214 215 def _parse_kwarg(kwarg_name): 216 if kwarg_name in kwargs: 217 tmp = kwargs.get(kwarg_name) 218 if isinstance(tmp, (int, float)): 219 if tmp < 0: 220 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 221 for e, e_name in enumerate(self.e_names): 222 getattr(self, kwarg_name)[e_name] = tmp 223 else: 224 raise TypeError(kwarg_name + ' is not in proper format.') 225 else: 226 for e, e_name in enumerate(self.e_names): 227 if e_name in getattr(Obs, kwarg_name + '_dict'): 228 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 229 else: 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 231 232 _parse_kwarg('S') 233 _parse_kwarg('tau_exp') 234 _parse_kwarg('N_sigma') 235 236 for e, e_name in enumerate(self.mc_names): 237 r_length = [] 238 for r_name in e_content[e_name]: 239 if isinstance(self.idl[r_name], range): 240 r_length.append(len(self.idl[r_name])) 241 else: 242 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 243 244 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 245 w_max = max(r_length) // 2 246 e_gamma[e_name] = np.zeros(w_max) 247 self.e_rho[e_name] = np.zeros(w_max) 248 self.e_drho[e_name] = np.zeros(w_max) 249 250 for r_name in e_content[e_name]: 251 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 252 253 gamma_div = np.zeros(w_max) 254 for r_name in e_content[e_name]: 255 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 256 gamma_div[gamma_div < 1] = 1.0 257 e_gamma[e_name] /= gamma_div[:w_max] 258 259 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 260 self.e_tauint[e_name] = 0.5 261 self.e_dtauint[e_name] = 0.0 262 self.e_dvalue[e_name] = 0.0 263 self.e_ddvalue[e_name] = 0.0 264 self.e_windowsize[e_name] = 0 265 continue 266 267 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 268 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 269 # Make sure no entry of tauint is smaller than 0.5 270 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 271 # hep-lat/0306017 eq. (42) 272 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 273 self.e_n_dtauint[e_name][0] = 0.0 274 275 def _compute_drho(i): 276 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 277 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 278 279 _compute_drho(1) 280 if self.tau_exp[e_name] > 0: 281 texp = self.tau_exp[e_name] 282 # Critical slowing down analysis 283 if w_max // 2 <= 1: 284 raise Exception("Need at least 8 samples for tau_exp error analysis") 285 for n in range(1, w_max // 2): 286 _compute_drho(n + 1) 287 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 288 # Bias correction hep-lat/0306017 eq. (49) included 289 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 290 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 291 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 292 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 293 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 294 self.e_windowsize[e_name] = n 295 break 296 else: 297 if self.S[e_name] == 0.0: 298 self.e_tauint[e_name] = 0.5 299 self.e_dtauint[e_name] = 0.0 300 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 302 self.e_windowsize[e_name] = 0 303 else: 304 # Standard automatic windowing procedure 305 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 306 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 307 for n in range(1, w_max): 308 if n < w_max // 2 - 2: 309 _compute_drho(n + 1) 310 if g_w[n - 1] < 0 or n >= w_max - 1: 311 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 312 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 313 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 315 self.e_windowsize[e_name] = n 316 break 317 318 self._dvalue += self.e_dvalue[e_name] ** 2 319 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 320 321 for e_name in self.cov_names: 322 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 323 self.e_ddvalue[e_name] = 0 324 self._dvalue += self.e_dvalue[e_name]**2 325 326 self._dvalue = np.sqrt(self._dvalue) 327 if self._dvalue == 0.0: 328 self.ddvalue = 0.0 329 else: 330 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 331 return 332 333 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 334 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 335 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 336 337 Parameters 338 ---------- 339 deltas : list 340 List of fluctuations 341 idx : list 342 List or range of configurations on which the deltas are defined. 343 shape : int 344 Number of configurations in idx. 345 w_max : int 346 Upper bound for the summation window. 347 fft : bool 348 determines whether the fft algorithm is used for the computation 349 of the autocorrelation function. 350 """ 351 gamma = np.zeros(w_max) 352 deltas = _expand_deltas(deltas, idx, shape) 353 new_shape = len(deltas) 354 if fft: 355 max_gamma = min(new_shape, w_max) 356 # The padding for the fft has to be even 357 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 358 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 359 else: 360 for n in range(w_max): 361 if new_shape - n >= 0: 362 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 363 364 return gamma 365 366 def details(self, ens_content=True): 367 """Output detailed properties of the Obs. 368 369 Parameters 370 ---------- 371 ens_content : bool 372 print details about the ensembles and replica if true. 373 """ 374 if self.tag is not None: 375 print("Description:", self.tag) 376 if not hasattr(self, 'e_dvalue'): 377 print('Result\t %3.8e' % (self.value)) 378 else: 379 if self.value == 0.0: 380 percentage = np.nan 381 else: 382 percentage = np.abs(self._dvalue / self.value) * 100 383 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 384 if len(self.e_names) > 1: 385 print(' Ensemble errors:') 386 for e_name in self.mc_names: 387 if len(self.e_names) > 1: 388 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 389 if self.tau_exp[e_name] > 0: 390 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 391 else: 392 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 393 for e_name in self.cov_names: 394 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 395 if ens_content is True: 396 if len(self.e_names) == 1: 397 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 398 else: 399 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 400 my_string_list = [] 401 for key, value in sorted(self.e_content.items()): 402 if key not in self.covobs: 403 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 404 if len(value) == 1: 405 my_string += f': {self.shape[value[0]]} configurations' 406 if isinstance(self.idl[value[0]], range): 407 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 408 else: 409 my_string += ' (irregular range)' 410 else: 411 sublist = [] 412 for v in value: 413 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 414 my_substring += f': {self.shape[v]} configurations' 415 if isinstance(self.idl[v], range): 416 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 417 else: 418 my_substring += ' (irregular range)' 419 sublist.append(my_substring) 420 421 my_string += '\n' + '\n'.join(sublist) 422 else: 423 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 424 my_string_list.append(my_string) 425 print('\n'.join(my_string_list)) 426 427 def is_zero_within_error(self, sigma=1): 428 """Checks whether the observable is zero within 'sigma' standard errors. 429 430 Parameters 431 ---------- 432 sigma : int 433 Number of standard errors used for the check. 434 435 Works only properly when the gamma method was run. 436 """ 437 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 438 439 def is_zero(self, atol=1e-10): 440 """Checks whether the observable is zero within a given tolerance. 441 442 Parameters 443 ---------- 444 atol : float 445 Absolute tolerance (for details see numpy documentation). 446 """ 447 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 448 449 def plot_tauint(self, save=None): 450 """Plot integrated autocorrelation time for each ensemble. 451 452 Parameters 453 ---------- 454 save : str 455 saves the figure to a file named 'save' if. 456 """ 457 if not hasattr(self, 'e_dvalue'): 458 raise Exception('Run the gamma method first.') 459 460 for e, e_name in enumerate(self.mc_names): 461 fig = plt.figure() 462 plt.xlabel(r'$W$') 463 plt.ylabel(r'$\tau_\mathrm{int}$') 464 length = int(len(self.e_n_tauint[e_name])) 465 if self.tau_exp[e_name] > 0: 466 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 467 x_help = np.arange(2 * self.tau_exp[e_name]) 468 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 469 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 470 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 471 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 472 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 473 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 474 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 475 else: 476 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 477 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 478 479 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 480 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 481 plt.legend() 482 plt.xlim(-0.5, xmax) 483 ylim = plt.ylim() 484 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 485 plt.draw() 486 if save: 487 fig.savefig(save + "_" + str(e)) 488 489 def plot_rho(self, save=None): 490 """Plot normalized autocorrelation function time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 for e, e_name in enumerate(self.mc_names): 500 fig = plt.figure() 501 plt.xlabel('W') 502 plt.ylabel('rho') 503 length = int(len(self.e_drho[e_name])) 504 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 505 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 506 if self.tau_exp[e_name] > 0: 507 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 508 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 511 else: 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 514 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 515 plt.xlim(-0.5, xmax) 516 plt.draw() 517 if save: 518 fig.savefig(save + "_" + str(e)) 519 520 def plot_rep_dist(self): 521 """Plot replica distribution for each ensemble with more than one replicum.""" 522 if not hasattr(self, 'e_dvalue'): 523 raise Exception('Run the gamma method first.') 524 for e, e_name in enumerate(self.mc_names): 525 if len(self.e_content[e_name]) == 1: 526 print('No replica distribution for a single replicum (', e_name, ')') 527 continue 528 r_length = [] 529 sub_r_mean = 0 530 for r, r_name in enumerate(self.e_content[e_name]): 531 r_length.append(len(self.deltas[r_name])) 532 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 533 e_N = np.sum(r_length) 534 sub_r_mean /= e_N 535 arr = np.zeros(len(self.e_content[e_name])) 536 for r, r_name in enumerate(self.e_content[e_name]): 537 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 538 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 539 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 540 plt.draw() 541 542 def plot_history(self, expand=True): 543 """Plot derived Monte Carlo history for each ensemble 544 545 Parameters 546 ---------- 547 expand : bool 548 show expanded history for irregular Monte Carlo chains (default: True). 549 """ 550 for e, e_name in enumerate(self.mc_names): 551 plt.figure() 552 r_length = [] 553 tmp = [] 554 tmp_expanded = [] 555 for r, r_name in enumerate(self.e_content[e_name]): 556 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 557 if expand: 558 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 559 r_length.append(len(tmp_expanded[-1])) 560 else: 561 r_length.append(len(tmp[-1])) 562 e_N = np.sum(r_length) 563 x = np.arange(e_N) 564 y_test = np.concatenate(tmp, axis=0) 565 if expand: 566 y = np.concatenate(tmp_expanded, axis=0) 567 else: 568 y = y_test 569 plt.errorbar(x, y, fmt='.', markersize=3) 570 plt.xlim(-0.5, e_N - 0.5) 571 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 572 plt.draw() 573 574 def plot_piechart(self): 575 """Plot piechart which shows the fractional contribution of each 576 ensemble to the error and returns a dictionary containing the fractions.""" 577 if not hasattr(self, 'e_dvalue'): 578 raise Exception('Run the gamma method first.') 579 if np.isclose(0.0, self._dvalue, atol=1e-15): 580 raise Exception('Error is 0.0') 581 labels = self.e_names 582 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 583 fig1, ax1 = plt.subplots() 584 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 585 ax1.axis('equal') 586 plt.draw() 587 588 return dict(zip(self.e_names, sizes)) 589 590 def dump(self, filename, datatype="json.gz", description="", **kwargs): 591 """Dump the Obs to a file 'name' of chosen format. 592 593 Parameters 594 ---------- 595 filename : str 596 name of the file to be saved. 597 datatype : str 598 Format of the exported file. Supported formats include 599 "json.gz" and "pickle" 600 description : str 601 Description for output file, only relevant for json.gz format. 602 path : str 603 specifies a custom path for the file (default '.') 604 """ 605 if 'path' in kwargs: 606 file_name = kwargs.get('path') + '/' + filename 607 else: 608 file_name = filename 609 610 if datatype == "json.gz": 611 from .input.json import dump_to_json 612 dump_to_json([self], file_name, description=description) 613 elif datatype == "pickle": 614 with open(file_name + '.p', 'wb') as fb: 615 pickle.dump(self, fb) 616 else: 617 raise Exception("Unknown datatype " + str(datatype)) 618 619 def export_jackknife(self): 620 """Export jackknife samples from the Obs 621 622 Returns 623 ------- 624 numpy.ndarray 625 Returns a numpy array of length N + 1 where N is the number of samples 626 for the given ensemble and replicum. The zeroth entry of the array contains 627 the mean value of the Obs, entries 1 to N contain the N jackknife samples 628 derived from the Obs. The current implementation only works for observables 629 defined on exactly one ensemble and replicum. The derived jackknife samples 630 should agree with samples from a full jackknife analysis up to O(1/N). 631 """ 632 633 if len(self.names) != 1: 634 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 635 636 name = self.names[0] 637 full_data = self.deltas[name] + self.r_values[name] 638 n = full_data.size 639 mean = self.value 640 tmp_jacks = np.zeros(n + 1) 641 tmp_jacks[0] = mean 642 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 643 return tmp_jacks 644 645 def __float__(self): 646 return float(self.value) 647 648 def __repr__(self): 649 return 'Obs[' + str(self) + ']' 650 651 def __str__(self): 652 if self._dvalue == 0.0: 653 return str(self.value) 654 fexp = np.floor(np.log10(self._dvalue)) 655 if fexp < 0.0: 656 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 657 elif fexp == 0.0: 658 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 659 else: 660 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 661 662 # Overload comparisons 663 def __lt__(self, other): 664 return self.value < other 665 666 def __le__(self, other): 667 return self.value <= other 668 669 def __gt__(self, other): 670 return self.value > other 671 672 def __ge__(self, other): 673 return self.value >= other 674 675 def __eq__(self, other): 676 return (self - other).is_zero() 677 678 def __ne__(self, other): 679 return not (self - other).is_zero() 680 681 # Overload math operations 682 def __add__(self, y): 683 if isinstance(y, Obs): 684 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 685 else: 686 if isinstance(y, np.ndarray): 687 return np.array([self + o for o in y]) 688 elif y.__class__.__name__ in ['Corr', 'CObs']: 689 return NotImplemented 690 else: 691 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 692 693 def __radd__(self, y): 694 return self + y 695 696 def __mul__(self, y): 697 if isinstance(y, Obs): 698 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 699 else: 700 if isinstance(y, np.ndarray): 701 return np.array([self * o for o in y]) 702 elif isinstance(y, complex): 703 return CObs(self * y.real, self * y.imag) 704 elif y.__class__.__name__ in ['Corr', 'CObs']: 705 return NotImplemented 706 else: 707 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 708 709 def __rmul__(self, y): 710 return self * y 711 712 def __sub__(self, y): 713 if isinstance(y, Obs): 714 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 715 else: 716 if isinstance(y, np.ndarray): 717 return np.array([self - o for o in y]) 718 elif y.__class__.__name__ in ['Corr', 'CObs']: 719 return NotImplemented 720 else: 721 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 722 723 def __rsub__(self, y): 724 return -1 * (self - y) 725 726 def __pos__(self): 727 return self 728 729 def __neg__(self): 730 return -1 * self 731 732 def __truediv__(self, y): 733 if isinstance(y, Obs): 734 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 735 else: 736 if isinstance(y, np.ndarray): 737 return np.array([self / o for o in y]) 738 elif y.__class__.__name__ in ['Corr', 'CObs']: 739 return NotImplemented 740 else: 741 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 742 743 def __rtruediv__(self, y): 744 if isinstance(y, Obs): 745 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 746 else: 747 if isinstance(y, np.ndarray): 748 return np.array([o / self for o in y]) 749 elif y.__class__.__name__ in ['Corr', 'CObs']: 750 return NotImplemented 751 else: 752 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 753 754 def __pow__(self, y): 755 if isinstance(y, Obs): 756 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 757 else: 758 return derived_observable(lambda x: x[0] ** y, [self]) 759 760 def __rpow__(self, y): 761 if isinstance(y, Obs): 762 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 763 else: 764 return derived_observable(lambda x: y ** x[0], [self]) 765 766 def __abs__(self): 767 return derived_observable(lambda x: anp.abs(x[0]), [self]) 768 769 # Overload numpy functions 770 def sqrt(self): 771 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 772 773 def log(self): 774 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 775 776 def exp(self): 777 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 778 779 def sin(self): 780 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 781 782 def cos(self): 783 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 784 785 def tan(self): 786 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 787 788 def arcsin(self): 789 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 790 791 def arccos(self): 792 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 793 794 def arctan(self): 795 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 796 797 def sinh(self): 798 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 799 800 def cosh(self): 801 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 802 803 def tanh(self): 804 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 805 806 def arcsinh(self): 807 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 808 809 def arccosh(self): 810 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 811 812 def arctanh(self): 813 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
View Source
56 def __init__(self, samples, names, idl=None, **kwargs): 57 """ Initialize Obs object. 58 59 Parameters 60 ---------- 61 samples : list 62 list of numpy arrays containing the Monte Carlo samples 63 names : list 64 list of strings labeling the individual samples 65 idl : list, optional 66 list of ranges or lists on which the samples are defined 67 """ 68 69 if kwargs.get("means") is None and len(samples): 70 if len(samples) != len(names): 71 raise Exception('Length of samples and names incompatible.') 72 if idl is not None: 73 if len(idl) != len(names): 74 raise Exception('Length of idl incompatible with samples and names.') 75 name_length = len(names) 76 if name_length > 1: 77 if name_length != len(set(names)): 78 raise Exception('names are not unique.') 79 if not all(isinstance(x, str) for x in names): 80 raise TypeError('All names have to be strings.') 81 else: 82 if not isinstance(names[0], str): 83 raise TypeError('All names have to be strings.') 84 if min(len(x) for x in samples) <= 4: 85 raise Exception('Samples have to have at least 5 entries.') 86 87 self.names = sorted(names) 88 self.shape = {} 89 self.r_values = {} 90 self.deltas = {} 91 self._covobs = {} 92 93 self._value = 0 94 self.N = 0 95 self.is_merged = {} 96 self.idl = {} 97 if idl is not None: 98 for name, idx in sorted(zip(names, idl)): 99 if isinstance(idx, range): 100 self.idl[name] = idx 101 elif isinstance(idx, (list, np.ndarray)): 102 dc = np.unique(np.diff(idx)) 103 if np.any(dc < 0): 104 raise Exception("Unsorted idx for idl[%s]" % (name)) 105 if len(dc) == 1: 106 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 107 else: 108 self.idl[name] = list(idx) 109 else: 110 raise Exception('incompatible type for idl[%s].' % (name)) 111 else: 112 for name, sample in sorted(zip(names, samples)): 113 self.idl[name] = range(1, len(sample) + 1) 114 115 if kwargs.get("means") is not None: 116 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 117 self.shape[name] = len(self.idl[name]) 118 self.N += self.shape[name] 119 self.r_values[name] = mean 120 self.deltas[name] = sample 121 else: 122 for name, sample in sorted(zip(names, samples)): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 if len(sample) != self.shape[name]: 126 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 127 self.r_values[name] = np.mean(sample) 128 self.deltas[name] = sample - self.r_values[name] 129 self._value += self.shape[name] * self.r_values[name] 130 self._value /= self.N 131 132 self._dvalue = 0.0 133 self.ddvalue = 0.0 134 self.reweighted = False 135 136 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
View Source
171 def gamma_method(self, **kwargs): 172 """Estimate the error and related properties of the Obs. 173 174 Parameters 175 ---------- 176 S : float 177 specifies a custom value for the parameter S (default 2.0). 178 If set to 0 it is assumed that the data exhibits no 179 autocorrelation. In this case the error estimates coincides 180 with the sample standard error. 181 tau_exp : float 182 positive value triggers the critical slowing down analysis 183 (default 0.0). 184 N_sigma : float 185 number of standard deviations from zero until the tail is 186 attached to the autocorrelation function (default 1). 187 fft : bool 188 determines whether the fft algorithm is used for the computation 189 of the autocorrelation function (default True) 190 """ 191 192 e_content = self.e_content 193 self.e_dvalue = {} 194 self.e_ddvalue = {} 195 self.e_tauint = {} 196 self.e_dtauint = {} 197 self.e_windowsize = {} 198 self.e_n_tauint = {} 199 self.e_n_dtauint = {} 200 e_gamma = {} 201 self.e_rho = {} 202 self.e_drho = {} 203 self._dvalue = 0 204 self.ddvalue = 0 205 206 self.S = {} 207 self.tau_exp = {} 208 self.N_sigma = {} 209 210 if kwargs.get('fft') is False: 211 fft = False 212 else: 213 fft = True 214 215 def _parse_kwarg(kwarg_name): 216 if kwarg_name in kwargs: 217 tmp = kwargs.get(kwarg_name) 218 if isinstance(tmp, (int, float)): 219 if tmp < 0: 220 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 221 for e, e_name in enumerate(self.e_names): 222 getattr(self, kwarg_name)[e_name] = tmp 223 else: 224 raise TypeError(kwarg_name + ' is not in proper format.') 225 else: 226 for e, e_name in enumerate(self.e_names): 227 if e_name in getattr(Obs, kwarg_name + '_dict'): 228 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 229 else: 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 231 232 _parse_kwarg('S') 233 _parse_kwarg('tau_exp') 234 _parse_kwarg('N_sigma') 235 236 for e, e_name in enumerate(self.mc_names): 237 r_length = [] 238 for r_name in e_content[e_name]: 239 if isinstance(self.idl[r_name], range): 240 r_length.append(len(self.idl[r_name])) 241 else: 242 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 243 244 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 245 w_max = max(r_length) // 2 246 e_gamma[e_name] = np.zeros(w_max) 247 self.e_rho[e_name] = np.zeros(w_max) 248 self.e_drho[e_name] = np.zeros(w_max) 249 250 for r_name in e_content[e_name]: 251 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 252 253 gamma_div = np.zeros(w_max) 254 for r_name in e_content[e_name]: 255 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 256 gamma_div[gamma_div < 1] = 1.0 257 e_gamma[e_name] /= gamma_div[:w_max] 258 259 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 260 self.e_tauint[e_name] = 0.5 261 self.e_dtauint[e_name] = 0.0 262 self.e_dvalue[e_name] = 0.0 263 self.e_ddvalue[e_name] = 0.0 264 self.e_windowsize[e_name] = 0 265 continue 266 267 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 268 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 269 # Make sure no entry of tauint is smaller than 0.5 270 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 271 # hep-lat/0306017 eq. (42) 272 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 273 self.e_n_dtauint[e_name][0] = 0.0 274 275 def _compute_drho(i): 276 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 277 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 278 279 _compute_drho(1) 280 if self.tau_exp[e_name] > 0: 281 texp = self.tau_exp[e_name] 282 # Critical slowing down analysis 283 if w_max // 2 <= 1: 284 raise Exception("Need at least 8 samples for tau_exp error analysis") 285 for n in range(1, w_max // 2): 286 _compute_drho(n + 1) 287 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 288 # Bias correction hep-lat/0306017 eq. (49) included 289 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 290 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 291 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 292 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 293 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 294 self.e_windowsize[e_name] = n 295 break 296 else: 297 if self.S[e_name] == 0.0: 298 self.e_tauint[e_name] = 0.5 299 self.e_dtauint[e_name] = 0.0 300 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 302 self.e_windowsize[e_name] = 0 303 else: 304 # Standard automatic windowing procedure 305 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 306 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 307 for n in range(1, w_max): 308 if n < w_max // 2 - 2: 309 _compute_drho(n + 1) 310 if g_w[n - 1] < 0 or n >= w_max - 1: 311 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 312 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 313 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 315 self.e_windowsize[e_name] = n 316 break 317 318 self._dvalue += self.e_dvalue[e_name] ** 2 319 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 320 321 for e_name in self.cov_names: 322 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 323 self.e_ddvalue[e_name] = 0 324 self._dvalue += self.e_dvalue[e_name]**2 325 326 self._dvalue = np.sqrt(self._dvalue) 327 if self._dvalue == 0.0: 328 self.ddvalue = 0.0 329 else: 330 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 331 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
View Source
366 def details(self, ens_content=True): 367 """Output detailed properties of the Obs. 368 369 Parameters 370 ---------- 371 ens_content : bool 372 print details about the ensembles and replica if true. 373 """ 374 if self.tag is not None: 375 print("Description:", self.tag) 376 if not hasattr(self, 'e_dvalue'): 377 print('Result\t %3.8e' % (self.value)) 378 else: 379 if self.value == 0.0: 380 percentage = np.nan 381 else: 382 percentage = np.abs(self._dvalue / self.value) * 100 383 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 384 if len(self.e_names) > 1: 385 print(' Ensemble errors:') 386 for e_name in self.mc_names: 387 if len(self.e_names) > 1: 388 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 389 if self.tau_exp[e_name] > 0: 390 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 391 else: 392 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 393 for e_name in self.cov_names: 394 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 395 if ens_content is True: 396 if len(self.e_names) == 1: 397 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 398 else: 399 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 400 my_string_list = [] 401 for key, value in sorted(self.e_content.items()): 402 if key not in self.covobs: 403 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 404 if len(value) == 1: 405 my_string += f': {self.shape[value[0]]} configurations' 406 if isinstance(self.idl[value[0]], range): 407 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 408 else: 409 my_string += ' (irregular range)' 410 else: 411 sublist = [] 412 for v in value: 413 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 414 my_substring += f': {self.shape[v]} configurations' 415 if isinstance(self.idl[v], range): 416 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 417 else: 418 my_substring += ' (irregular range)' 419 sublist.append(my_substring) 420 421 my_string += '\n' + '\n'.join(sublist) 422 else: 423 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 424 my_string_list.append(my_string) 425 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
View Source
427 def is_zero_within_error(self, sigma=1): 428 """Checks whether the observable is zero within 'sigma' standard errors. 429 430 Parameters 431 ---------- 432 sigma : int 433 Number of standard errors used for the check. 434 435 Works only properly when the gamma method was run. 436 """ 437 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
View Source
439 def is_zero(self, atol=1e-10): 440 """Checks whether the observable is zero within a given tolerance. 441 442 Parameters 443 ---------- 444 atol : float 445 Absolute tolerance (for details see numpy documentation). 446 """ 447 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
View Source
449 def plot_tauint(self, save=None): 450 """Plot integrated autocorrelation time for each ensemble. 451 452 Parameters 453 ---------- 454 save : str 455 saves the figure to a file named 'save' if. 456 """ 457 if not hasattr(self, 'e_dvalue'): 458 raise Exception('Run the gamma method first.') 459 460 for e, e_name in enumerate(self.mc_names): 461 fig = plt.figure() 462 plt.xlabel(r'$W$') 463 plt.ylabel(r'$\tau_\mathrm{int}$') 464 length = int(len(self.e_n_tauint[e_name])) 465 if self.tau_exp[e_name] > 0: 466 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 467 x_help = np.arange(2 * self.tau_exp[e_name]) 468 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 469 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 470 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 471 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 472 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 473 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 474 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 475 else: 476 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 477 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 478 479 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 480 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 481 plt.legend() 482 plt.xlim(-0.5, xmax) 483 ylim = plt.ylim() 484 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 485 plt.draw() 486 if save: 487 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
View Source
489 def plot_rho(self, save=None): 490 """Plot normalized autocorrelation function time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 for e, e_name in enumerate(self.mc_names): 500 fig = plt.figure() 501 plt.xlabel('W') 502 plt.ylabel('rho') 503 length = int(len(self.e_drho[e_name])) 504 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 505 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 506 if self.tau_exp[e_name] > 0: 507 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 508 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 511 else: 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 514 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 515 plt.xlim(-0.5, xmax) 516 plt.draw() 517 if save: 518 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
View Source
520 def plot_rep_dist(self): 521 """Plot replica distribution for each ensemble with more than one replicum.""" 522 if not hasattr(self, 'e_dvalue'): 523 raise Exception('Run the gamma method first.') 524 for e, e_name in enumerate(self.mc_names): 525 if len(self.e_content[e_name]) == 1: 526 print('No replica distribution for a single replicum (', e_name, ')') 527 continue 528 r_length = [] 529 sub_r_mean = 0 530 for r, r_name in enumerate(self.e_content[e_name]): 531 r_length.append(len(self.deltas[r_name])) 532 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 533 e_N = np.sum(r_length) 534 sub_r_mean /= e_N 535 arr = np.zeros(len(self.e_content[e_name])) 536 for r, r_name in enumerate(self.e_content[e_name]): 537 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 538 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 539 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 540 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
View Source
542 def plot_history(self, expand=True): 543 """Plot derived Monte Carlo history for each ensemble 544 545 Parameters 546 ---------- 547 expand : bool 548 show expanded history for irregular Monte Carlo chains (default: True). 549 """ 550 for e, e_name in enumerate(self.mc_names): 551 plt.figure() 552 r_length = [] 553 tmp = [] 554 tmp_expanded = [] 555 for r, r_name in enumerate(self.e_content[e_name]): 556 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 557 if expand: 558 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 559 r_length.append(len(tmp_expanded[-1])) 560 else: 561 r_length.append(len(tmp[-1])) 562 e_N = np.sum(r_length) 563 x = np.arange(e_N) 564 y_test = np.concatenate(tmp, axis=0) 565 if expand: 566 y = np.concatenate(tmp_expanded, axis=0) 567 else: 568 y = y_test 569 plt.errorbar(x, y, fmt='.', markersize=3) 570 plt.xlim(-0.5, e_N - 0.5) 571 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 572 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
View Source
574 def plot_piechart(self): 575 """Plot piechart which shows the fractional contribution of each 576 ensemble to the error and returns a dictionary containing the fractions.""" 577 if not hasattr(self, 'e_dvalue'): 578 raise Exception('Run the gamma method first.') 579 if np.isclose(0.0, self._dvalue, atol=1e-15): 580 raise Exception('Error is 0.0') 581 labels = self.e_names 582 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 583 fig1, ax1 = plt.subplots() 584 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 585 ax1.axis('equal') 586 plt.draw() 587 588 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
View Source
590 def dump(self, filename, datatype="json.gz", description="", **kwargs): 591 """Dump the Obs to a file 'name' of chosen format. 592 593 Parameters 594 ---------- 595 filename : str 596 name of the file to be saved. 597 datatype : str 598 Format of the exported file. Supported formats include 599 "json.gz" and "pickle" 600 description : str 601 Description for output file, only relevant for json.gz format. 602 path : str 603 specifies a custom path for the file (default '.') 604 """ 605 if 'path' in kwargs: 606 file_name = kwargs.get('path') + '/' + filename 607 else: 608 file_name = filename 609 610 if datatype == "json.gz": 611 from .input.json import dump_to_json 612 dump_to_json([self], file_name, description=description) 613 elif datatype == "pickle": 614 with open(file_name + '.p', 'wb') as fb: 615 pickle.dump(self, fb) 616 else: 617 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
View Source
619 def export_jackknife(self): 620 """Export jackknife samples from the Obs 621 622 Returns 623 ------- 624 numpy.ndarray 625 Returns a numpy array of length N + 1 where N is the number of samples 626 for the given ensemble and replicum. The zeroth entry of the array contains 627 the mean value of the Obs, entries 1 to N contain the N jackknife samples 628 derived from the Obs. The current implementation only works for observables 629 defined on exactly one ensemble and replicum. The derived jackknife samples 630 should agree with samples from a full jackknife analysis up to O(1/N). 631 """ 632 633 if len(self.names) != 1: 634 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 635 636 name = self.names[0] 637 full_data = self.deltas[name] + self.r_values[name] 638 n = full_data.size 639 mean = self.value 640 tmp_jacks = np.zeros(n + 1) 641 tmp_jacks[0] = mean 642 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 643 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
View Source
816class CObs: 817 """Class for a complex valued observable.""" 818 __slots__ = ['_real', '_imag', 'tag'] 819 820 def __init__(self, real, imag=0.0): 821 self._real = real 822 self._imag = imag 823 self.tag = None 824 825 @property 826 def real(self): 827 return self._real 828 829 @property 830 def imag(self): 831 return self._imag 832 833 def gamma_method(self, **kwargs): 834 """Executes the gamma_method for the real and the imaginary part.""" 835 if isinstance(self.real, Obs): 836 self.real.gamma_method(**kwargs) 837 if isinstance(self.imag, Obs): 838 self.imag.gamma_method(**kwargs) 839 840 def is_zero(self): 841 """Checks whether both real and imaginary part are zero within machine precision.""" 842 return self.real == 0.0 and self.imag == 0.0 843 844 def conjugate(self): 845 return CObs(self.real, -self.imag) 846 847 def __add__(self, other): 848 if isinstance(other, np.ndarray): 849 return other + self 850 elif hasattr(other, 'real') and hasattr(other, 'imag'): 851 return CObs(self.real + other.real, 852 self.imag + other.imag) 853 else: 854 return CObs(self.real + other, self.imag) 855 856 def __radd__(self, y): 857 return self + y 858 859 def __sub__(self, other): 860 if isinstance(other, np.ndarray): 861 return -1 * (other - self) 862 elif hasattr(other, 'real') and hasattr(other, 'imag'): 863 return CObs(self.real - other.real, self.imag - other.imag) 864 else: 865 return CObs(self.real - other, self.imag) 866 867 def __rsub__(self, other): 868 return -1 * (self - other) 869 870 def __mul__(self, other): 871 if isinstance(other, np.ndarray): 872 return other * self 873 elif hasattr(other, 'real') and hasattr(other, 'imag'): 874 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 875 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 876 [self.real, other.real, self.imag, other.imag], 877 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 878 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 879 [self.real, other.real, self.imag, other.imag], 880 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 881 elif getattr(other, 'imag', 0) != 0: 882 return CObs(self.real * other.real - self.imag * other.imag, 883 self.imag * other.real + self.real * other.imag) 884 else: 885 return CObs(self.real * other.real, self.imag * other.real) 886 else: 887 return CObs(self.real * other, self.imag * other) 888 889 def __rmul__(self, other): 890 return self * other 891 892 def __truediv__(self, other): 893 if isinstance(other, np.ndarray): 894 return 1 / (other / self) 895 elif hasattr(other, 'real') and hasattr(other, 'imag'): 896 r = other.real ** 2 + other.imag ** 2 897 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 898 else: 899 return CObs(self.real / other, self.imag / other) 900 901 def __rtruediv__(self, other): 902 r = self.real ** 2 + self.imag ** 2 903 if hasattr(other, 'real') and hasattr(other, 'imag'): 904 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 905 else: 906 return CObs(self.real * other / r, -self.imag * other / r) 907 908 def __abs__(self): 909 return np.sqrt(self.real**2 + self.imag**2) 910 911 def __pos__(self): 912 return self 913 914 def __neg__(self): 915 return -1 * self 916 917 def __eq__(self, other): 918 return self.real == other.real and self.imag == other.imag 919 920 def __str__(self): 921 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 922 923 def __repr__(self): 924 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
View Source
Executes the gamma_method for the real and the imaginary part.
View Source
Checks whether both real and imaginary part are zero within machine precision.
View Source
1032def derived_observable(func, data, array_mode=False, **kwargs): 1033 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1034 1035 Parameters 1036 ---------- 1037 func : object 1038 arbitrary function of the form func(data, **kwargs). For the 1039 automatic differentiation to work, all numpy functions have to have 1040 the autograd wrapper (use 'import autograd.numpy as anp'). 1041 data : list 1042 list of Obs, e.g. [obs1, obs2, obs3]. 1043 num_grad : bool 1044 if True, numerical derivatives are used instead of autograd 1045 (default False). To control the numerical differentiation the 1046 kwargs of numdifftools.step_generators.MaxStepGenerator 1047 can be used. 1048 man_grad : list 1049 manually supply a list or an array which contains the jacobian 1050 of func. Use cautiously, supplying the wrong derivative will 1051 not be intercepted. 1052 1053 Notes 1054 ----- 1055 For simple mathematical operations it can be practical to use anonymous 1056 functions. For the ratio of two observables one can e.g. use 1057 1058 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1059 """ 1060 1061 data = np.asarray(data) 1062 raveled_data = data.ravel() 1063 1064 # Workaround for matrix operations containing non Obs data 1065 if not all(isinstance(x, Obs) for x in raveled_data): 1066 for i in range(len(raveled_data)): 1067 if isinstance(raveled_data[i], (int, float)): 1068 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1069 1070 allcov = {} 1071 for o in raveled_data: 1072 for name in o.cov_names: 1073 if name in allcov: 1074 if not np.allclose(allcov[name], o.covobs[name].cov): 1075 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1076 else: 1077 allcov[name] = o.covobs[name].cov 1078 1079 n_obs = len(raveled_data) 1080 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1081 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1082 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1083 1084 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1085 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1086 1087 if data.ndim == 1: 1088 values = np.array([o.value for o in data]) 1089 else: 1090 values = np.vectorize(lambda x: x.value)(data) 1091 1092 new_values = func(values, **kwargs) 1093 1094 multi = int(isinstance(new_values, np.ndarray)) 1095 1096 new_r_values = {} 1097 new_idl_d = {} 1098 for name in new_sample_names: 1099 idl = [] 1100 tmp_values = np.zeros(n_obs) 1101 for i, item in enumerate(raveled_data): 1102 tmp_values[i] = item.r_values.get(name, item.value) 1103 tmp_idl = item.idl.get(name) 1104 if tmp_idl is not None: 1105 idl.append(tmp_idl) 1106 if multi > 0: 1107 tmp_values = np.array(tmp_values).reshape(data.shape) 1108 new_r_values[name] = func(tmp_values, **kwargs) 1109 new_idl_d[name] = _merge_idx(idl) 1110 if not is_merged[name]: 1111 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1112 1113 if 'man_grad' in kwargs: 1114 deriv = np.asarray(kwargs.get('man_grad')) 1115 if new_values.shape + data.shape != deriv.shape: 1116 raise Exception('Manual derivative does not have correct shape.') 1117 elif kwargs.get('num_grad') is True: 1118 if multi > 0: 1119 raise Exception('Multi mode currently not supported for numerical derivative') 1120 options = { 1121 'base_step': 0.1, 1122 'step_ratio': 2.5} 1123 for key in options.keys(): 1124 kwarg = kwargs.get(key) 1125 if kwarg is not None: 1126 options[key] = kwarg 1127 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1128 if tmp_df.size == 1: 1129 deriv = np.array([tmp_df.real]) 1130 else: 1131 deriv = tmp_df.real 1132 else: 1133 deriv = jacobian(func)(values, **kwargs) 1134 1135 final_result = np.zeros(new_values.shape, dtype=object) 1136 1137 if array_mode is True: 1138 1139 class _Zero_grad(): 1140 def __init__(self, N): 1141 self.grad = np.zeros((N, 1)) 1142 1143 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1144 d_extracted = {} 1145 g_extracted = {} 1146 for name in new_sample_names: 1147 d_extracted[name] = [] 1148 ens_length = len(new_idl_d[name]) 1149 for i_dat, dat in enumerate(data): 1150 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1151 for name in new_cov_names: 1152 g_extracted[name] = [] 1153 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1154 for i_dat, dat in enumerate(data): 1155 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1156 1157 for i_val, new_val in np.ndenumerate(new_values): 1158 new_deltas = {} 1159 new_grad = {} 1160 if array_mode is True: 1161 for name in new_sample_names: 1162 ens_length = d_extracted[name][0].shape[-1] 1163 new_deltas[name] = np.zeros(ens_length) 1164 for i_dat, dat in enumerate(d_extracted[name]): 1165 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1166 for name in new_cov_names: 1167 new_grad[name] = 0 1168 for i_dat, dat in enumerate(g_extracted[name]): 1169 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1170 else: 1171 for j_obs, obs in np.ndenumerate(data): 1172 for name in obs.names: 1173 if name in obs.cov_names: 1174 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1175 else: 1176 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1177 1178 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1179 1180 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1181 raise Exception('The same name has been used for deltas and covobs!') 1182 new_samples = [] 1183 new_means = [] 1184 new_idl = [] 1185 new_names_obs = [] 1186 for name in new_names: 1187 if name not in new_covobs: 1188 if is_merged[name]: 1189 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1190 else: 1191 filtered_deltas = new_deltas[name] 1192 filtered_idl_d = new_idl_d[name] 1193 1194 new_samples.append(filtered_deltas) 1195 new_idl.append(filtered_idl_d) 1196 new_means.append(new_r_values[name][i_val]) 1197 new_names_obs.append(name) 1198 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1199 for name in new_covobs: 1200 final_result[i_val].names.append(name) 1201 final_result[i_val]._covobs = new_covobs 1202 final_result[i_val]._value = new_val 1203 final_result[i_val].is_merged = is_merged 1204 final_result[i_val].reweighted = reweighted 1205 1206 if multi == 0: 1207 final_result = final_result.item() 1208 1209 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
View Source
1249def reweight(weight, obs, **kwargs): 1250 """Reweight a list of observables. 1251 1252 Parameters 1253 ---------- 1254 weight : Obs 1255 Reweighting factor. An Observable that has to be defined on a superset of the 1256 configurations in obs[i].idl for all i. 1257 obs : list 1258 list of Obs, e.g. [obs1, obs2, obs3]. 1259 all_configs : bool 1260 if True, the reweighted observables are normalized by the average of 1261 the reweighting factor on all configurations in weight.idl and not 1262 on the configurations in obs[i].idl. 1263 """ 1264 result = [] 1265 for i in range(len(obs)): 1266 if len(obs[i].cov_names): 1267 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1268 if not set(obs[i].names).issubset(weight.names): 1269 raise Exception('Error: Ensembles do not fit') 1270 for name in obs[i].names: 1271 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1272 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1273 new_samples = [] 1274 w_deltas = {} 1275 for name in sorted(obs[i].names): 1276 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1277 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1278 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1279 1280 if kwargs.get('all_configs'): 1281 new_weight = weight 1282 else: 1283 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1284 1285 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1286 result[-1].reweighted = True 1287 result[-1].is_merged = obs[i].is_merged 1288 1289 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
View Source
1292def correlate(obs_a, obs_b): 1293 """Correlate two observables. 1294 1295 Parameters 1296 ---------- 1297 obs_a : Obs 1298 First observable 1299 obs_b : Obs 1300 Second observable 1301 1302 Notes 1303 ----- 1304 Keep in mind to only correlate primary observables which have not been reweighted 1305 yet. The reweighting has to be applied after correlating the observables. 1306 Currently only works if ensembles are identical (this is not strictly necessary). 1307 """ 1308 1309 if sorted(obs_a.names) != sorted(obs_b.names): 1310 raise Exception('Ensembles do not fit') 1311 if len(obs_a.cov_names) or len(obs_b.cov_names): 1312 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1313 for name in obs_a.names: 1314 if obs_a.shape[name] != obs_b.shape[name]: 1315 raise Exception('Shapes of ensemble', name, 'do not fit') 1316 if obs_a.idl[name] != obs_b.idl[name]: 1317 raise Exception('idl of ensemble', name, 'do not fit') 1318 1319 if obs_a.reweighted is True: 1320 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1321 if obs_b.reweighted is True: 1322 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1323 1324 new_samples = [] 1325 new_idl = [] 1326 for name in sorted(obs_a.names): 1327 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1328 new_idl.append(obs_a.idl[name]) 1329 1330 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1331 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1332 o.reweighted = obs_a.reweighted or obs_b.reweighted 1333 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
View Source
1336def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1337 r'''Calculates the covariance matrix of a set of observables. 1338 1339 The gamma method has to be applied first to all observables. 1340 1341 Parameters 1342 ---------- 1343 obs : list or numpy.ndarray 1344 List or one dimensional array of Obs 1345 visualize : bool 1346 If True plots the corresponding normalized correlation matrix (default False). 1347 correlation : bool 1348 If True the correlation instead of the covariance is returned (default False). 1349 smooth : None or int 1350 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1351 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1352 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1353 small ones. 1354 1355 Notes 1356 ----- 1357 The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1358 $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1359 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1360 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1361 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1362 ''' 1363 1364 length = len(obs) 1365 1366 max_samples = np.max([o.N for o in obs]) 1367 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1368 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1369 1370 cov = np.zeros((length, length)) 1371 for i in range(length): 1372 for j in range(i, length): 1373 cov[i, j] = _covariance_element(obs[i], obs[j]) 1374 cov = cov + cov.T - np.diag(np.diag(cov)) 1375 1376 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1377 1378 if isinstance(smooth, int): 1379 corr = _smooth_eigenvalues(corr, smooth) 1380 1381 errors = [o.dvalue for o in obs] 1382 cov = np.diag(errors) @ corr @ np.diag(errors) 1383 1384 eigenvalues = np.linalg.eigh(cov)[0] 1385 if not np.all(eigenvalues >= 0): 1386 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1387 1388 if visualize: 1389 plt.matshow(corr, vmin=-1, vmax=1) 1390 plt.set_cmap('RdBu') 1391 plt.colorbar() 1392 plt.draw() 1393 1394 if correlation is True: 1395 return corr 1396 else: 1397 return cov
Calculates the covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation instead of the covariance is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
View Source
1476def import_jackknife(jacks, name, idl=None): 1477 """Imports jackknife samples and returns an Obs 1478 1479 Parameters 1480 ---------- 1481 jacks : numpy.ndarray 1482 numpy array containing the mean value as zeroth entry and 1483 the N jackknife samples as first to Nth entry. 1484 name : str 1485 name of the ensemble the samples are defined on. 1486 """ 1487 length = len(jacks) - 1 1488 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1489 samples = jacks[1:] @ prj 1490 mean = np.mean(samples) 1491 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1492 new_obs._value = jacks[0] 1493 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
View Source
1496def merge_obs(list_of_obs): 1497 """Combine all observables in list_of_obs into one new observable 1498 1499 Parameters 1500 ---------- 1501 list_of_obs : list 1502 list of the Obs object to be combined 1503 1504 Notes 1505 ----- 1506 It is not possible to combine obs which are based on the same replicum 1507 """ 1508 replist = [item for obs in list_of_obs for item in obs.names] 1509 if (len(replist) == len(set(replist))) is False: 1510 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1511 if any([len(o.cov_names) for o in list_of_obs]): 1512 raise Exception('Not possible to merge data that contains covobs!') 1513 new_dict = {} 1514 idl_dict = {} 1515 for o in list_of_obs: 1516 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1517 for key in set(o.deltas) | set(o.r_values)}) 1518 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1519 1520 names = sorted(new_dict.keys()) 1521 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1522 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1523 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1524 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
View Source
1527def cov_Obs(means, cov, name, grad=None): 1528 """Create an Obs based on mean(s) and a covariance matrix 1529 1530 Parameters 1531 ---------- 1532 mean : list of floats or float 1533 N mean value(s) of the new Obs 1534 cov : list or array 1535 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1536 name : str 1537 identifier for the covariance matrix 1538 grad : list or array 1539 Gradient of the Covobs wrt. the means belonging to cov. 1540 """ 1541 1542 def covobs_to_obs(co): 1543 """Make an Obs out of a Covobs 1544 1545 Parameters 1546 ---------- 1547 co : Covobs 1548 Covobs to be embedded into the Obs 1549 """ 1550 o = Obs([], [], means=[]) 1551 o._value = co.value 1552 o.names.append(co.name) 1553 o._covobs[co.name] = co 1554 o._dvalue = np.sqrt(co.errsq()) 1555 return o 1556 1557 ol = [] 1558 if isinstance(means, (float, int)): 1559 means = [means] 1560 1561 for i in range(len(means)): 1562 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1563 if ol[0].covobs[name].N != len(means): 1564 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1565 if len(ol) == 1: 1566 return ol[0] 1567 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.