pyerrors.obs
1import warnings 2import pickle 3import numpy as np 4import autograd.numpy as anp # Thinly-wrapped numpy 5from autograd import jacobian 6import matplotlib.pyplot as plt 7from scipy.stats import skew, skewtest, kurtosis, kurtosistest 8import numdifftools as nd 9from itertools import groupby 10from .covobs import Covobs 11 12 13class Obs: 14 """Class for a general observable. 15 16 Instances of Obs are the basic objects of a pyerrors error analysis. 17 They are initialized with a list which contains arrays of samples for 18 different ensembles/replica and another list of same length which contains 19 the names of the ensembles/replica. Mathematical operations can be 20 performed on instances. The result is another instance of Obs. The error of 21 an instance can be computed with the gamma_method. Also contains additional 22 methods for output and visualization of the error calculation. 23 24 Attributes 25 ---------- 26 S_global : float 27 Standard value for S (default 2.0) 28 S_dict : dict 29 Dictionary for S values. If an entry for a given ensemble 30 exists this overwrites the standard value for that ensemble. 31 tau_exp_global : float 32 Standard value for tau_exp (default 0.0) 33 tau_exp_dict : dict 34 Dictionary for tau_exp values. If an entry for a given ensemble exists 35 this overwrites the standard value for that ensemble. 36 N_sigma_global : float 37 Standard value for N_sigma (default 1.0) 38 N_sigma_dict : dict 39 Dictionary for N_sigma values. If an entry for a given ensemble exists 40 this overwrites the standard value for that ensemble. 41 """ 42 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 43 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 44 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 45 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 46 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 47 48 S_global = 2.0 49 S_dict = {} 50 tau_exp_global = 0.0 51 tau_exp_dict = {} 52 N_sigma_global = 1.0 53 N_sigma_dict = {} 54 filter_eps = 1e-10 55 56 def __init__(self, samples, names, idl=None, **kwargs): 57 """ Initialize Obs object. 58 59 Parameters 60 ---------- 61 samples : list 62 list of numpy arrays containing the Monte Carlo samples 63 names : list 64 list of strings labeling the individual samples 65 idl : list, optional 66 list of ranges or lists on which the samples are defined 67 """ 68 69 if kwargs.get("means") is None and len(samples): 70 if len(samples) != len(names): 71 raise Exception('Length of samples and names incompatible.') 72 if idl is not None: 73 if len(idl) != len(names): 74 raise Exception('Length of idl incompatible with samples and names.') 75 name_length = len(names) 76 if name_length > 1: 77 if name_length != len(set(names)): 78 raise Exception('names are not unique.') 79 if not all(isinstance(x, str) for x in names): 80 raise TypeError('All names have to be strings.') 81 else: 82 if not isinstance(names[0], str): 83 raise TypeError('All names have to be strings.') 84 if min(len(x) for x in samples) <= 4: 85 raise Exception('Samples have to have at least 5 entries.') 86 87 self.names = sorted(names) 88 self.shape = {} 89 self.r_values = {} 90 self.deltas = {} 91 self._covobs = {} 92 93 self._value = 0 94 self.N = 0 95 self.is_merged = {} 96 self.idl = {} 97 if idl is not None: 98 for name, idx in sorted(zip(names, idl)): 99 if isinstance(idx, range): 100 self.idl[name] = idx 101 elif isinstance(idx, (list, np.ndarray)): 102 dc = np.unique(np.diff(idx)) 103 if np.any(dc < 0): 104 raise Exception("Unsorted idx for idl[%s]" % (name)) 105 if len(dc) == 1: 106 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 107 else: 108 self.idl[name] = list(idx) 109 else: 110 raise Exception('incompatible type for idl[%s].' % (name)) 111 else: 112 for name, sample in sorted(zip(names, samples)): 113 self.idl[name] = range(1, len(sample) + 1) 114 115 if kwargs.get("means") is not None: 116 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 117 self.shape[name] = len(self.idl[name]) 118 self.N += self.shape[name] 119 self.r_values[name] = mean 120 self.deltas[name] = sample 121 else: 122 for name, sample in sorted(zip(names, samples)): 123 self.shape[name] = len(self.idl[name]) 124 self.N += self.shape[name] 125 if len(sample) != self.shape[name]: 126 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 127 self.r_values[name] = np.mean(sample) 128 self.deltas[name] = sample - self.r_values[name] 129 self._value += self.shape[name] * self.r_values[name] 130 self._value /= self.N 131 132 self._dvalue = 0.0 133 self.ddvalue = 0.0 134 self.reweighted = False 135 136 self.tag = None 137 138 @property 139 def value(self): 140 return self._value 141 142 @property 143 def dvalue(self): 144 return self._dvalue 145 146 @property 147 def e_names(self): 148 return sorted(set([o.split('|')[0] for o in self.names])) 149 150 @property 151 def cov_names(self): 152 return sorted(set([o for o in self.covobs.keys()])) 153 154 @property 155 def mc_names(self): 156 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 157 158 @property 159 def e_content(self): 160 res = {} 161 for e, e_name in enumerate(self.e_names): 162 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 163 if e_name in self.names: 164 res[e_name].append(e_name) 165 return res 166 167 @property 168 def covobs(self): 169 return self._covobs 170 171 def gamma_method(self, **kwargs): 172 """Estimate the error and related properties of the Obs. 173 174 Parameters 175 ---------- 176 S : float 177 specifies a custom value for the parameter S (default 2.0). 178 If set to 0 it is assumed that the data exhibits no 179 autocorrelation. In this case the error estimates coincides 180 with the sample standard error. 181 tau_exp : float 182 positive value triggers the critical slowing down analysis 183 (default 0.0). 184 N_sigma : float 185 number of standard deviations from zero until the tail is 186 attached to the autocorrelation function (default 1). 187 fft : bool 188 determines whether the fft algorithm is used for the computation 189 of the autocorrelation function (default True) 190 """ 191 192 e_content = self.e_content 193 self.e_dvalue = {} 194 self.e_ddvalue = {} 195 self.e_tauint = {} 196 self.e_dtauint = {} 197 self.e_windowsize = {} 198 self.e_n_tauint = {} 199 self.e_n_dtauint = {} 200 e_gamma = {} 201 self.e_rho = {} 202 self.e_drho = {} 203 self._dvalue = 0 204 self.ddvalue = 0 205 206 self.S = {} 207 self.tau_exp = {} 208 self.N_sigma = {} 209 210 if kwargs.get('fft') is False: 211 fft = False 212 else: 213 fft = True 214 215 def _parse_kwarg(kwarg_name): 216 if kwarg_name in kwargs: 217 tmp = kwargs.get(kwarg_name) 218 if isinstance(tmp, (int, float)): 219 if tmp < 0: 220 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 221 for e, e_name in enumerate(self.e_names): 222 getattr(self, kwarg_name)[e_name] = tmp 223 else: 224 raise TypeError(kwarg_name + ' is not in proper format.') 225 else: 226 for e, e_name in enumerate(self.e_names): 227 if e_name in getattr(Obs, kwarg_name + '_dict'): 228 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 229 else: 230 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 231 232 _parse_kwarg('S') 233 _parse_kwarg('tau_exp') 234 _parse_kwarg('N_sigma') 235 236 for e, e_name in enumerate(self.mc_names): 237 r_length = [] 238 for r_name in e_content[e_name]: 239 if isinstance(self.idl[r_name], range): 240 r_length.append(len(self.idl[r_name])) 241 else: 242 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 243 244 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 245 w_max = max(r_length) // 2 246 e_gamma[e_name] = np.zeros(w_max) 247 self.e_rho[e_name] = np.zeros(w_max) 248 self.e_drho[e_name] = np.zeros(w_max) 249 250 for r_name in e_content[e_name]: 251 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 252 253 gamma_div = np.zeros(w_max) 254 for r_name in e_content[e_name]: 255 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 256 gamma_div[gamma_div < 1] = 1.0 257 e_gamma[e_name] /= gamma_div[:w_max] 258 259 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 260 self.e_tauint[e_name] = 0.5 261 self.e_dtauint[e_name] = 0.0 262 self.e_dvalue[e_name] = 0.0 263 self.e_ddvalue[e_name] = 0.0 264 self.e_windowsize[e_name] = 0 265 continue 266 267 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 268 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 269 # Make sure no entry of tauint is smaller than 0.5 270 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 271 # hep-lat/0306017 eq. (42) 272 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 273 self.e_n_dtauint[e_name][0] = 0.0 274 275 def _compute_drho(i): 276 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 277 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 278 279 _compute_drho(1) 280 if self.tau_exp[e_name] > 0: 281 texp = self.tau_exp[e_name] 282 # Critical slowing down analysis 283 if w_max // 2 <= 1: 284 raise Exception("Need at least 8 samples for tau_exp error analysis") 285 for n in range(1, w_max // 2): 286 _compute_drho(n + 1) 287 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 288 # Bias correction hep-lat/0306017 eq. (49) included 289 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 290 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 291 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 292 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 293 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 294 self.e_windowsize[e_name] = n 295 break 296 else: 297 if self.S[e_name] == 0.0: 298 self.e_tauint[e_name] = 0.5 299 self.e_dtauint[e_name] = 0.0 300 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 301 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 302 self.e_windowsize[e_name] = 0 303 else: 304 # Standard automatic windowing procedure 305 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 306 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 307 for n in range(1, w_max): 308 if n < w_max // 2 - 2: 309 _compute_drho(n + 1) 310 if g_w[n - 1] < 0 or n >= w_max - 1: 311 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 312 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 313 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 314 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 315 self.e_windowsize[e_name] = n 316 break 317 318 self._dvalue += self.e_dvalue[e_name] ** 2 319 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 320 321 for e_name in self.cov_names: 322 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 323 self.e_ddvalue[e_name] = 0 324 self._dvalue += self.e_dvalue[e_name]**2 325 326 self._dvalue = np.sqrt(self._dvalue) 327 if self._dvalue == 0.0: 328 self.ddvalue = 0.0 329 else: 330 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 331 return 332 333 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 334 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 335 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 336 337 Parameters 338 ---------- 339 deltas : list 340 List of fluctuations 341 idx : list 342 List or range of configurations on which the deltas are defined. 343 shape : int 344 Number of configurations in idx. 345 w_max : int 346 Upper bound for the summation window. 347 fft : bool 348 determines whether the fft algorithm is used for the computation 349 of the autocorrelation function. 350 """ 351 gamma = np.zeros(w_max) 352 deltas = _expand_deltas(deltas, idx, shape) 353 new_shape = len(deltas) 354 if fft: 355 max_gamma = min(new_shape, w_max) 356 # The padding for the fft has to be even 357 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 358 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 359 else: 360 for n in range(w_max): 361 if new_shape - n >= 0: 362 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 363 364 return gamma 365 366 def details(self, ens_content=True): 367 """Output detailed properties of the Obs. 368 369 Parameters 370 ---------- 371 ens_content : bool 372 print details about the ensembles and replica if true. 373 """ 374 if self.tag is not None: 375 print("Description:", self.tag) 376 if not hasattr(self, 'e_dvalue'): 377 print('Result\t %3.8e' % (self.value)) 378 else: 379 if self.value == 0.0: 380 percentage = np.nan 381 else: 382 percentage = np.abs(self._dvalue / self.value) * 100 383 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 384 if len(self.e_names) > 1: 385 print(' Ensemble errors:') 386 for e_name in self.mc_names: 387 if len(self.e_names) > 1: 388 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 389 if self.tau_exp[e_name] > 0: 390 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 391 else: 392 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 393 for e_name in self.cov_names: 394 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 395 if ens_content is True: 396 if len(self.e_names) == 1: 397 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 398 else: 399 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 400 my_string_list = [] 401 for key, value in sorted(self.e_content.items()): 402 if key not in self.covobs: 403 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 404 if len(value) == 1: 405 my_string += f': {self.shape[value[0]]} configurations' 406 if isinstance(self.idl[value[0]], range): 407 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 408 else: 409 my_string += ' (irregular range)' 410 else: 411 sublist = [] 412 for v in value: 413 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 414 my_substring += f': {self.shape[v]} configurations' 415 if isinstance(self.idl[v], range): 416 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 417 else: 418 my_substring += ' (irregular range)' 419 sublist.append(my_substring) 420 421 my_string += '\n' + '\n'.join(sublist) 422 else: 423 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 424 my_string_list.append(my_string) 425 print('\n'.join(my_string_list)) 426 427 def is_zero_within_error(self, sigma=1): 428 """Checks whether the observable is zero within 'sigma' standard errors. 429 430 Parameters 431 ---------- 432 sigma : int 433 Number of standard errors used for the check. 434 435 Works only properly when the gamma method was run. 436 """ 437 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 438 439 def is_zero(self, atol=1e-10): 440 """Checks whether the observable is zero within a given tolerance. 441 442 Parameters 443 ---------- 444 atol : float 445 Absolute tolerance (for details see numpy documentation). 446 """ 447 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 448 449 def plot_tauint(self, save=None): 450 """Plot integrated autocorrelation time for each ensemble. 451 452 Parameters 453 ---------- 454 save : str 455 saves the figure to a file named 'save' if. 456 """ 457 if not hasattr(self, 'e_dvalue'): 458 raise Exception('Run the gamma method first.') 459 460 for e, e_name in enumerate(self.mc_names): 461 fig = plt.figure() 462 plt.xlabel(r'$W$') 463 plt.ylabel(r'$\tau_\mathrm{int}$') 464 length = int(len(self.e_n_tauint[e_name])) 465 if self.tau_exp[e_name] > 0: 466 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 467 x_help = np.arange(2 * self.tau_exp[e_name]) 468 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 469 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 470 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 471 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 472 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 473 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 474 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 475 else: 476 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 477 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 478 479 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 480 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 481 plt.legend() 482 plt.xlim(-0.5, xmax) 483 ylim = plt.ylim() 484 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 485 plt.draw() 486 if save: 487 fig.savefig(save + "_" + str(e)) 488 489 def plot_rho(self, save=None): 490 """Plot normalized autocorrelation function time for each ensemble. 491 492 Parameters 493 ---------- 494 save : str 495 saves the figure to a file named 'save' if. 496 """ 497 if not hasattr(self, 'e_dvalue'): 498 raise Exception('Run the gamma method first.') 499 for e, e_name in enumerate(self.mc_names): 500 fig = plt.figure() 501 plt.xlabel('W') 502 plt.ylabel('rho') 503 length = int(len(self.e_drho[e_name])) 504 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 505 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 506 if self.tau_exp[e_name] > 0: 507 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 508 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 509 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 510 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 511 else: 512 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 513 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 514 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 515 plt.xlim(-0.5, xmax) 516 plt.draw() 517 if save: 518 fig.savefig(save + "_" + str(e)) 519 520 def plot_rep_dist(self): 521 """Plot replica distribution for each ensemble with more than one replicum.""" 522 if not hasattr(self, 'e_dvalue'): 523 raise Exception('Run the gamma method first.') 524 for e, e_name in enumerate(self.mc_names): 525 if len(self.e_content[e_name]) == 1: 526 print('No replica distribution for a single replicum (', e_name, ')') 527 continue 528 r_length = [] 529 sub_r_mean = 0 530 for r, r_name in enumerate(self.e_content[e_name]): 531 r_length.append(len(self.deltas[r_name])) 532 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 533 e_N = np.sum(r_length) 534 sub_r_mean /= e_N 535 arr = np.zeros(len(self.e_content[e_name])) 536 for r, r_name in enumerate(self.e_content[e_name]): 537 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 538 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 539 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 540 plt.draw() 541 542 def plot_history(self, expand=True): 543 """Plot derived Monte Carlo history for each ensemble 544 545 Parameters 546 ---------- 547 expand : bool 548 show expanded history for irregular Monte Carlo chains (default: True). 549 """ 550 for e, e_name in enumerate(self.mc_names): 551 plt.figure() 552 r_length = [] 553 tmp = [] 554 tmp_expanded = [] 555 for r, r_name in enumerate(self.e_content[e_name]): 556 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 557 if expand: 558 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 559 r_length.append(len(tmp_expanded[-1])) 560 else: 561 r_length.append(len(tmp[-1])) 562 e_N = np.sum(r_length) 563 x = np.arange(e_N) 564 y_test = np.concatenate(tmp, axis=0) 565 if expand: 566 y = np.concatenate(tmp_expanded, axis=0) 567 else: 568 y = y_test 569 plt.errorbar(x, y, fmt='.', markersize=3) 570 plt.xlim(-0.5, e_N - 0.5) 571 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 572 plt.draw() 573 574 def plot_piechart(self, save=None): 575 """Plot piechart which shows the fractional contribution of each 576 ensemble to the error and returns a dictionary containing the fractions. 577 578 Parameters 579 ---------- 580 save : str 581 saves the figure to a file named 'save' if. 582 """ 583 if not hasattr(self, 'e_dvalue'): 584 raise Exception('Run the gamma method first.') 585 if np.isclose(0.0, self._dvalue, atol=1e-15): 586 raise Exception('Error is 0.0') 587 labels = self.e_names 588 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 589 fig1, ax1 = plt.subplots() 590 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 591 ax1.axis('equal') 592 plt.draw() 593 if save: 594 fig1.savefig(save) 595 596 return dict(zip(self.e_names, sizes)) 597 598 def dump(self, filename, datatype="json.gz", description="", **kwargs): 599 """Dump the Obs to a file 'name' of chosen format. 600 601 Parameters 602 ---------- 603 filename : str 604 name of the file to be saved. 605 datatype : str 606 Format of the exported file. Supported formats include 607 "json.gz" and "pickle" 608 description : str 609 Description for output file, only relevant for json.gz format. 610 path : str 611 specifies a custom path for the file (default '.') 612 """ 613 if 'path' in kwargs: 614 file_name = kwargs.get('path') + '/' + filename 615 else: 616 file_name = filename 617 618 if datatype == "json.gz": 619 from .input.json import dump_to_json 620 dump_to_json([self], file_name, description=description) 621 elif datatype == "pickle": 622 with open(file_name + '.p', 'wb') as fb: 623 pickle.dump(self, fb) 624 else: 625 raise Exception("Unknown datatype " + str(datatype)) 626 627 def export_jackknife(self): 628 """Export jackknife samples from the Obs 629 630 Returns 631 ------- 632 numpy.ndarray 633 Returns a numpy array of length N + 1 where N is the number of samples 634 for the given ensemble and replicum. The zeroth entry of the array contains 635 the mean value of the Obs, entries 1 to N contain the N jackknife samples 636 derived from the Obs. The current implementation only works for observables 637 defined on exactly one ensemble and replicum. The derived jackknife samples 638 should agree with samples from a full jackknife analysis up to O(1/N). 639 """ 640 641 if len(self.names) != 1: 642 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 643 644 name = self.names[0] 645 full_data = self.deltas[name] + self.r_values[name] 646 n = full_data.size 647 mean = self.value 648 tmp_jacks = np.zeros(n + 1) 649 tmp_jacks[0] = mean 650 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 651 return tmp_jacks 652 653 def __float__(self): 654 return float(self.value) 655 656 def __repr__(self): 657 return 'Obs[' + str(self) + ']' 658 659 def __str__(self): 660 if self._dvalue == 0.0: 661 return str(self.value) 662 fexp = np.floor(np.log10(self._dvalue)) 663 if fexp < 0.0: 664 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 665 elif fexp == 0.0: 666 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 667 else: 668 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 669 670 # Overload comparisons 671 def __lt__(self, other): 672 return self.value < other 673 674 def __le__(self, other): 675 return self.value <= other 676 677 def __gt__(self, other): 678 return self.value > other 679 680 def __ge__(self, other): 681 return self.value >= other 682 683 def __eq__(self, other): 684 return (self - other).is_zero() 685 686 def __ne__(self, other): 687 return not (self - other).is_zero() 688 689 # Overload math operations 690 def __add__(self, y): 691 if isinstance(y, Obs): 692 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 693 else: 694 if isinstance(y, np.ndarray): 695 return np.array([self + o for o in y]) 696 elif y.__class__.__name__ in ['Corr', 'CObs']: 697 return NotImplemented 698 else: 699 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 700 701 def __radd__(self, y): 702 return self + y 703 704 def __mul__(self, y): 705 if isinstance(y, Obs): 706 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 707 else: 708 if isinstance(y, np.ndarray): 709 return np.array([self * o for o in y]) 710 elif isinstance(y, complex): 711 return CObs(self * y.real, self * y.imag) 712 elif y.__class__.__name__ in ['Corr', 'CObs']: 713 return NotImplemented 714 else: 715 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 716 717 def __rmul__(self, y): 718 return self * y 719 720 def __sub__(self, y): 721 if isinstance(y, Obs): 722 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 723 else: 724 if isinstance(y, np.ndarray): 725 return np.array([self - o for o in y]) 726 elif y.__class__.__name__ in ['Corr', 'CObs']: 727 return NotImplemented 728 else: 729 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 730 731 def __rsub__(self, y): 732 return -1 * (self - y) 733 734 def __pos__(self): 735 return self 736 737 def __neg__(self): 738 return -1 * self 739 740 def __truediv__(self, y): 741 if isinstance(y, Obs): 742 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 743 else: 744 if isinstance(y, np.ndarray): 745 return np.array([self / o for o in y]) 746 elif y.__class__.__name__ in ['Corr', 'CObs']: 747 return NotImplemented 748 else: 749 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 750 751 def __rtruediv__(self, y): 752 if isinstance(y, Obs): 753 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 754 else: 755 if isinstance(y, np.ndarray): 756 return np.array([o / self for o in y]) 757 elif y.__class__.__name__ in ['Corr', 'CObs']: 758 return NotImplemented 759 else: 760 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 761 762 def __pow__(self, y): 763 if isinstance(y, Obs): 764 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 765 else: 766 return derived_observable(lambda x: x[0] ** y, [self]) 767 768 def __rpow__(self, y): 769 if isinstance(y, Obs): 770 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 771 else: 772 return derived_observable(lambda x: y ** x[0], [self]) 773 774 def __abs__(self): 775 return derived_observable(lambda x: anp.abs(x[0]), [self]) 776 777 # Overload numpy functions 778 def sqrt(self): 779 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 780 781 def log(self): 782 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 783 784 def exp(self): 785 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 786 787 def sin(self): 788 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 789 790 def cos(self): 791 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 792 793 def tan(self): 794 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 795 796 def arcsin(self): 797 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 798 799 def arccos(self): 800 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 801 802 def arctan(self): 803 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 804 805 def sinh(self): 806 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 807 808 def cosh(self): 809 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 810 811 def tanh(self): 812 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 813 814 def arcsinh(self): 815 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 816 817 def arccosh(self): 818 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 819 820 def arctanh(self): 821 return derived_observable(lambda x: anp.arctanh(x[0]), [self]) 822 823 824class CObs: 825 """Class for a complex valued observable.""" 826 __slots__ = ['_real', '_imag', 'tag'] 827 828 def __init__(self, real, imag=0.0): 829 self._real = real 830 self._imag = imag 831 self.tag = None 832 833 @property 834 def real(self): 835 return self._real 836 837 @property 838 def imag(self): 839 return self._imag 840 841 def gamma_method(self, **kwargs): 842 """Executes the gamma_method for the real and the imaginary part.""" 843 if isinstance(self.real, Obs): 844 self.real.gamma_method(**kwargs) 845 if isinstance(self.imag, Obs): 846 self.imag.gamma_method(**kwargs) 847 848 def is_zero(self): 849 """Checks whether both real and imaginary part are zero within machine precision.""" 850 return self.real == 0.0 and self.imag == 0.0 851 852 def conjugate(self): 853 return CObs(self.real, -self.imag) 854 855 def __add__(self, other): 856 if isinstance(other, np.ndarray): 857 return other + self 858 elif hasattr(other, 'real') and hasattr(other, 'imag'): 859 return CObs(self.real + other.real, 860 self.imag + other.imag) 861 else: 862 return CObs(self.real + other, self.imag) 863 864 def __radd__(self, y): 865 return self + y 866 867 def __sub__(self, other): 868 if isinstance(other, np.ndarray): 869 return -1 * (other - self) 870 elif hasattr(other, 'real') and hasattr(other, 'imag'): 871 return CObs(self.real - other.real, self.imag - other.imag) 872 else: 873 return CObs(self.real - other, self.imag) 874 875 def __rsub__(self, other): 876 return -1 * (self - other) 877 878 def __mul__(self, other): 879 if isinstance(other, np.ndarray): 880 return other * self 881 elif hasattr(other, 'real') and hasattr(other, 'imag'): 882 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 883 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 884 [self.real, other.real, self.imag, other.imag], 885 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 886 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 887 [self.real, other.real, self.imag, other.imag], 888 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 889 elif getattr(other, 'imag', 0) != 0: 890 return CObs(self.real * other.real - self.imag * other.imag, 891 self.imag * other.real + self.real * other.imag) 892 else: 893 return CObs(self.real * other.real, self.imag * other.real) 894 else: 895 return CObs(self.real * other, self.imag * other) 896 897 def __rmul__(self, other): 898 return self * other 899 900 def __truediv__(self, other): 901 if isinstance(other, np.ndarray): 902 return 1 / (other / self) 903 elif hasattr(other, 'real') and hasattr(other, 'imag'): 904 r = other.real ** 2 + other.imag ** 2 905 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 906 else: 907 return CObs(self.real / other, self.imag / other) 908 909 def __rtruediv__(self, other): 910 r = self.real ** 2 + self.imag ** 2 911 if hasattr(other, 'real') and hasattr(other, 'imag'): 912 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 913 else: 914 return CObs(self.real * other / r, -self.imag * other / r) 915 916 def __abs__(self): 917 return np.sqrt(self.real**2 + self.imag**2) 918 919 def __pos__(self): 920 return self 921 922 def __neg__(self): 923 return -1 * self 924 925 def __eq__(self, other): 926 return self.real == other.real and self.imag == other.imag 927 928 def __str__(self): 929 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 930 931 def __repr__(self): 932 return 'CObs[' + str(self) + ']' 933 934 935def _expand_deltas(deltas, idx, shape): 936 """Expand deltas defined on idx to a regular, contiguous range, where holes are filled by 0. 937 If idx is of type range, the deltas are not changed 938 939 Parameters 940 ---------- 941 deltas : list 942 List of fluctuations 943 idx : list 944 List or range of configs on which the deltas are defined, has to be sorted in ascending order. 945 shape : int 946 Number of configs in idx. 947 """ 948 if isinstance(idx, range): 949 return deltas 950 else: 951 ret = np.zeros(idx[-1] - idx[0] + 1) 952 for i in range(shape): 953 ret[idx[i] - idx[0]] = deltas[i] 954 return ret 955 956 957def _merge_idx(idl): 958 """Returns the union of all lists in idl as sorted list 959 960 Parameters 961 ---------- 962 idl : list 963 List of lists or ranges. 964 """ 965 966 # Use groupby to efficiently check whether all elements of idl are identical 967 try: 968 g = groupby(idl) 969 if next(g, True) and not next(g, False): 970 return idl[0] 971 except Exception: 972 pass 973 974 if np.all([type(idx) is range for idx in idl]): 975 if len(set([idx[0] for idx in idl])) == 1: 976 idstart = min([idx.start for idx in idl]) 977 idstop = max([idx.stop for idx in idl]) 978 idstep = min([idx.step for idx in idl]) 979 return range(idstart, idstop, idstep) 980 981 return sorted(set().union(*idl)) 982 983 984def _expand_deltas_for_merge(deltas, idx, shape, new_idx): 985 """Expand deltas defined on idx to the list of configs that is defined by new_idx. 986 New, empty entries are filled by 0. If idx and new_idx are of type range, the smallest 987 common divisor of the step sizes is used as new step size. 988 989 Parameters 990 ---------- 991 deltas : list 992 List of fluctuations 993 idx : list 994 List or range of configs on which the deltas are defined. 995 Has to be a subset of new_idx and has to be sorted in ascending order. 996 shape : list 997 Number of configs in idx. 998 new_idx : list 999 List of configs that defines the new range, has to be sorted in ascending order. 1000 """ 1001 1002 if type(idx) is range and type(new_idx) is range: 1003 if idx == new_idx: 1004 return deltas 1005 ret = np.zeros(new_idx[-1] - new_idx[0] + 1) 1006 for i in range(shape): 1007 ret[idx[i] - new_idx[0]] = deltas[i] 1008 return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) 1009 1010 1011def _filter_zeroes(deltas, idx, eps=Obs.filter_eps): 1012 """Filter out all configurations with vanishing fluctuation such that they do not 1013 contribute to the error estimate anymore. Returns the new deltas and 1014 idx according to the filtering. 1015 A fluctuation is considered to be vanishing, if it is smaller than eps times 1016 the mean of the absolute values of all deltas in one list. 1017 1018 Parameters 1019 ---------- 1020 deltas : list 1021 List of fluctuations 1022 idx : list 1023 List or ranges of configs on which the deltas are defined. 1024 eps : float 1025 Prefactor that enters the filter criterion. 1026 """ 1027 new_deltas = [] 1028 new_idx = [] 1029 maxd = np.mean(np.fabs(deltas)) 1030 for i in range(len(deltas)): 1031 if abs(deltas[i]) > eps * maxd: 1032 new_deltas.append(deltas[i]) 1033 new_idx.append(idx[i]) 1034 if new_idx: 1035 return np.array(new_deltas), new_idx 1036 else: 1037 return deltas, idx 1038 1039 1040def derived_observable(func, data, array_mode=False, **kwargs): 1041 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1042 1043 Parameters 1044 ---------- 1045 func : object 1046 arbitrary function of the form func(data, **kwargs). For the 1047 automatic differentiation to work, all numpy functions have to have 1048 the autograd wrapper (use 'import autograd.numpy as anp'). 1049 data : list 1050 list of Obs, e.g. [obs1, obs2, obs3]. 1051 num_grad : bool 1052 if True, numerical derivatives are used instead of autograd 1053 (default False). To control the numerical differentiation the 1054 kwargs of numdifftools.step_generators.MaxStepGenerator 1055 can be used. 1056 man_grad : list 1057 manually supply a list or an array which contains the jacobian 1058 of func. Use cautiously, supplying the wrong derivative will 1059 not be intercepted. 1060 1061 Notes 1062 ----- 1063 For simple mathematical operations it can be practical to use anonymous 1064 functions. For the ratio of two observables one can e.g. use 1065 1066 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1067 """ 1068 1069 data = np.asarray(data) 1070 raveled_data = data.ravel() 1071 1072 # Workaround for matrix operations containing non Obs data 1073 if not all(isinstance(x, Obs) for x in raveled_data): 1074 for i in range(len(raveled_data)): 1075 if isinstance(raveled_data[i], (int, float)): 1076 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1077 1078 allcov = {} 1079 for o in raveled_data: 1080 for name in o.cov_names: 1081 if name in allcov: 1082 if not np.allclose(allcov[name], o.covobs[name].cov): 1083 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1084 else: 1085 allcov[name] = o.covobs[name].cov 1086 1087 n_obs = len(raveled_data) 1088 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1089 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1090 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1091 1092 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1093 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1094 1095 if data.ndim == 1: 1096 values = np.array([o.value for o in data]) 1097 else: 1098 values = np.vectorize(lambda x: x.value)(data) 1099 1100 new_values = func(values, **kwargs) 1101 1102 multi = int(isinstance(new_values, np.ndarray)) 1103 1104 new_r_values = {} 1105 new_idl_d = {} 1106 for name in new_sample_names: 1107 idl = [] 1108 tmp_values = np.zeros(n_obs) 1109 for i, item in enumerate(raveled_data): 1110 tmp_values[i] = item.r_values.get(name, item.value) 1111 tmp_idl = item.idl.get(name) 1112 if tmp_idl is not None: 1113 idl.append(tmp_idl) 1114 if multi > 0: 1115 tmp_values = np.array(tmp_values).reshape(data.shape) 1116 new_r_values[name] = func(tmp_values, **kwargs) 1117 new_idl_d[name] = _merge_idx(idl) 1118 if not is_merged[name]: 1119 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1120 1121 if 'man_grad' in kwargs: 1122 deriv = np.asarray(kwargs.get('man_grad')) 1123 if new_values.shape + data.shape != deriv.shape: 1124 raise Exception('Manual derivative does not have correct shape.') 1125 elif kwargs.get('num_grad') is True: 1126 if multi > 0: 1127 raise Exception('Multi mode currently not supported for numerical derivative') 1128 options = { 1129 'base_step': 0.1, 1130 'step_ratio': 2.5} 1131 for key in options.keys(): 1132 kwarg = kwargs.get(key) 1133 if kwarg is not None: 1134 options[key] = kwarg 1135 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1136 if tmp_df.size == 1: 1137 deriv = np.array([tmp_df.real]) 1138 else: 1139 deriv = tmp_df.real 1140 else: 1141 deriv = jacobian(func)(values, **kwargs) 1142 1143 final_result = np.zeros(new_values.shape, dtype=object) 1144 1145 if array_mode is True: 1146 1147 class _Zero_grad(): 1148 def __init__(self, N): 1149 self.grad = np.zeros((N, 1)) 1150 1151 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1152 d_extracted = {} 1153 g_extracted = {} 1154 for name in new_sample_names: 1155 d_extracted[name] = [] 1156 ens_length = len(new_idl_d[name]) 1157 for i_dat, dat in enumerate(data): 1158 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1159 for name in new_cov_names: 1160 g_extracted[name] = [] 1161 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1162 for i_dat, dat in enumerate(data): 1163 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1164 1165 for i_val, new_val in np.ndenumerate(new_values): 1166 new_deltas = {} 1167 new_grad = {} 1168 if array_mode is True: 1169 for name in new_sample_names: 1170 ens_length = d_extracted[name][0].shape[-1] 1171 new_deltas[name] = np.zeros(ens_length) 1172 for i_dat, dat in enumerate(d_extracted[name]): 1173 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1174 for name in new_cov_names: 1175 new_grad[name] = 0 1176 for i_dat, dat in enumerate(g_extracted[name]): 1177 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1178 else: 1179 for j_obs, obs in np.ndenumerate(data): 1180 for name in obs.names: 1181 if name in obs.cov_names: 1182 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1183 else: 1184 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1185 1186 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1187 1188 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1189 raise Exception('The same name has been used for deltas and covobs!') 1190 new_samples = [] 1191 new_means = [] 1192 new_idl = [] 1193 new_names_obs = [] 1194 for name in new_names: 1195 if name not in new_covobs: 1196 if is_merged[name]: 1197 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1198 else: 1199 filtered_deltas = new_deltas[name] 1200 filtered_idl_d = new_idl_d[name] 1201 1202 new_samples.append(filtered_deltas) 1203 new_idl.append(filtered_idl_d) 1204 new_means.append(new_r_values[name][i_val]) 1205 new_names_obs.append(name) 1206 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1207 for name in new_covobs: 1208 final_result[i_val].names.append(name) 1209 final_result[i_val]._covobs = new_covobs 1210 final_result[i_val]._value = new_val 1211 final_result[i_val].is_merged = is_merged 1212 final_result[i_val].reweighted = reweighted 1213 1214 if multi == 0: 1215 final_result = final_result.item() 1216 1217 return final_result 1218 1219 1220def _reduce_deltas(deltas, idx_old, idx_new): 1221 """Extract deltas defined on idx_old on all configs of idx_new. 1222 1223 Assumes, that idx_old and idx_new are correctly defined idl, i.e., they 1224 are ordered in an ascending order. 1225 1226 Parameters 1227 ---------- 1228 deltas : list 1229 List of fluctuations 1230 idx_old : list 1231 List or range of configs on which the deltas are defined 1232 idx_new : list 1233 List of configs for which we want to extract the deltas. 1234 Has to be a subset of idx_old. 1235 """ 1236 if not len(deltas) == len(idx_old): 1237 raise Exception('Length of deltas and idx_old have to be the same: %d != %d' % (len(deltas), len(idx_old))) 1238 if type(idx_old) is range and type(idx_new) is range: 1239 if idx_old == idx_new: 1240 return deltas 1241 shape = len(idx_new) 1242 ret = np.zeros(shape) 1243 oldpos = 0 1244 for i in range(shape): 1245 pos = -1 1246 for j in range(oldpos, len(idx_old)): 1247 if idx_old[j] == idx_new[i]: 1248 pos = j 1249 break 1250 if pos < 0: 1251 raise Exception('Error in _reduce_deltas: Config %d not in idx_old' % (idx_new[i])) 1252 ret[i] = deltas[pos] 1253 oldpos = pos 1254 return np.array(ret) 1255 1256 1257def reweight(weight, obs, **kwargs): 1258 """Reweight a list of observables. 1259 1260 Parameters 1261 ---------- 1262 weight : Obs 1263 Reweighting factor. An Observable that has to be defined on a superset of the 1264 configurations in obs[i].idl for all i. 1265 obs : list 1266 list of Obs, e.g. [obs1, obs2, obs3]. 1267 all_configs : bool 1268 if True, the reweighted observables are normalized by the average of 1269 the reweighting factor on all configurations in weight.idl and not 1270 on the configurations in obs[i].idl. 1271 """ 1272 result = [] 1273 for i in range(len(obs)): 1274 if len(obs[i].cov_names): 1275 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1276 if not set(obs[i].names).issubset(weight.names): 1277 raise Exception('Error: Ensembles do not fit') 1278 for name in obs[i].names: 1279 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1280 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1281 new_samples = [] 1282 w_deltas = {} 1283 for name in sorted(obs[i].names): 1284 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1285 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1286 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1287 1288 if kwargs.get('all_configs'): 1289 new_weight = weight 1290 else: 1291 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1292 1293 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1294 result[-1].reweighted = True 1295 result[-1].is_merged = obs[i].is_merged 1296 1297 return result 1298 1299 1300def correlate(obs_a, obs_b): 1301 """Correlate two observables. 1302 1303 Parameters 1304 ---------- 1305 obs_a : Obs 1306 First observable 1307 obs_b : Obs 1308 Second observable 1309 1310 Notes 1311 ----- 1312 Keep in mind to only correlate primary observables which have not been reweighted 1313 yet. The reweighting has to be applied after correlating the observables. 1314 Currently only works if ensembles are identical (this is not strictly necessary). 1315 """ 1316 1317 if sorted(obs_a.names) != sorted(obs_b.names): 1318 raise Exception('Ensembles do not fit') 1319 if len(obs_a.cov_names) or len(obs_b.cov_names): 1320 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1321 for name in obs_a.names: 1322 if obs_a.shape[name] != obs_b.shape[name]: 1323 raise Exception('Shapes of ensemble', name, 'do not fit') 1324 if obs_a.idl[name] != obs_b.idl[name]: 1325 raise Exception('idl of ensemble', name, 'do not fit') 1326 1327 if obs_a.reweighted is True: 1328 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1329 if obs_b.reweighted is True: 1330 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1331 1332 new_samples = [] 1333 new_idl = [] 1334 for name in sorted(obs_a.names): 1335 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1336 new_idl.append(obs_a.idl[name]) 1337 1338 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1339 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1340 o.reweighted = obs_a.reweighted or obs_b.reweighted 1341 return o 1342 1343 1344def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1345 r'''Calculates the error covariance matrix of a set of observables. 1346 1347 The gamma method has to be applied first to all observables. 1348 1349 Parameters 1350 ---------- 1351 obs : list or numpy.ndarray 1352 List or one dimensional array of Obs 1353 visualize : bool 1354 If True plots the corresponding normalized correlation matrix (default False). 1355 correlation : bool 1356 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1357 smooth : None or int 1358 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1359 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1360 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1361 small ones. 1362 1363 Notes 1364 ----- 1365 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1366 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1367 in the absence of autocorrelation. 1368 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1369 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1370 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1371 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1372 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1373 ''' 1374 1375 length = len(obs) 1376 1377 max_samples = np.max([o.N for o in obs]) 1378 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1379 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1380 1381 cov = np.zeros((length, length)) 1382 for i in range(length): 1383 for j in range(i, length): 1384 cov[i, j] = _covariance_element(obs[i], obs[j]) 1385 cov = cov + cov.T - np.diag(np.diag(cov)) 1386 1387 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1388 1389 if isinstance(smooth, int): 1390 corr = _smooth_eigenvalues(corr, smooth) 1391 1392 errors = [o.dvalue for o in obs] 1393 cov = np.diag(errors) @ corr @ np.diag(errors) 1394 1395 eigenvalues = np.linalg.eigh(cov)[0] 1396 if not np.all(eigenvalues >= 0): 1397 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1398 1399 if visualize: 1400 plt.matshow(corr, vmin=-1, vmax=1) 1401 plt.set_cmap('RdBu') 1402 plt.colorbar() 1403 plt.draw() 1404 1405 if correlation is True: 1406 return corr 1407 else: 1408 return cov 1409 1410 1411def _smooth_eigenvalues(corr, E): 1412 """Eigenvalue smoothing as described in hep-lat/9412087 1413 1414 corr : np.ndarray 1415 correlation matrix 1416 E : integer 1417 Number of eigenvalues to be left substantially unchanged 1418 """ 1419 if not (2 < E < corr.shape[0] - 1): 1420 raise Exception(f"'E' has to be between 2 and the dimension of the correlation matrix minus 1 ({corr.shape[0] - 1}).") 1421 vals, vec = np.linalg.eigh(corr) 1422 lambda_min = np.mean(vals[:-E]) 1423 vals[vals < lambda_min] = lambda_min 1424 vals /= np.mean(vals) 1425 return vec @ np.diag(vals) @ vec.T 1426 1427 1428def _covariance_element(obs1, obs2): 1429 """Estimates the covariance of two Obs objects, neglecting autocorrelations.""" 1430 1431 def calc_gamma(deltas1, deltas2, idx1, idx2, new_idx): 1432 deltas1 = _expand_deltas_for_merge(deltas1, idx1, len(idx1), new_idx) 1433 deltas2 = _expand_deltas_for_merge(deltas2, idx2, len(idx2), new_idx) 1434 return np.sum(deltas1 * deltas2) 1435 1436 if set(obs1.names).isdisjoint(set(obs2.names)): 1437 return 0.0 1438 1439 if not hasattr(obs1, 'e_dvalue') or not hasattr(obs2, 'e_dvalue'): 1440 raise Exception('The gamma method has to be applied to both Obs first.') 1441 1442 dvalue = 0.0 1443 1444 for e_name in obs1.mc_names: 1445 1446 if e_name not in obs2.mc_names: 1447 continue 1448 1449 idl_d = {} 1450 for r_name in obs1.e_content[e_name]: 1451 if r_name not in obs2.e_content[e_name]: 1452 continue 1453 idl_d[r_name] = _merge_idx([obs1.idl[r_name], obs2.idl[r_name]]) 1454 1455 gamma = 0.0 1456 1457 for r_name in obs1.e_content[e_name]: 1458 if r_name not in obs2.e_content[e_name]: 1459 continue 1460 gamma += calc_gamma(obs1.deltas[r_name], obs2.deltas[r_name], obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1461 1462 if gamma == 0.0: 1463 continue 1464 1465 gamma_div = 0.0 1466 e_N = 0 1467 for r_name in obs1.e_content[e_name]: 1468 if r_name not in obs2.e_content[e_name]: 1469 continue 1470 gamma_div += calc_gamma(np.ones(obs1.shape[r_name]), np.ones(obs2.shape[r_name]), obs1.idl[r_name], obs2.idl[r_name], idl_d[r_name]) 1471 e_N += len(idl_d[r_name]) 1472 gamma /= max(gamma_div, 1.0) 1473 1474 # Bias correction hep-lat/0306017 eq. (49) 1475 dvalue += (1 + 1 / e_N) * gamma / e_N 1476 1477 for e_name in obs1.cov_names: 1478 1479 if e_name not in obs2.cov_names: 1480 continue 1481 1482 dvalue += float(np.dot(np.transpose(obs1.covobs[e_name].grad), np.dot(obs1.covobs[e_name].cov, obs2.covobs[e_name].grad))) 1483 1484 return dvalue 1485 1486 1487def import_jackknife(jacks, name, idl=None): 1488 """Imports jackknife samples and returns an Obs 1489 1490 Parameters 1491 ---------- 1492 jacks : numpy.ndarray 1493 numpy array containing the mean value as zeroth entry and 1494 the N jackknife samples as first to Nth entry. 1495 name : str 1496 name of the ensemble the samples are defined on. 1497 """ 1498 length = len(jacks) - 1 1499 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1500 samples = jacks[1:] @ prj 1501 mean = np.mean(samples) 1502 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1503 new_obs._value = jacks[0] 1504 return new_obs 1505 1506 1507def merge_obs(list_of_obs): 1508 """Combine all observables in list_of_obs into one new observable 1509 1510 Parameters 1511 ---------- 1512 list_of_obs : list 1513 list of the Obs object to be combined 1514 1515 Notes 1516 ----- 1517 It is not possible to combine obs which are based on the same replicum 1518 """ 1519 replist = [item for obs in list_of_obs for item in obs.names] 1520 if (len(replist) == len(set(replist))) is False: 1521 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1522 if any([len(o.cov_names) for o in list_of_obs]): 1523 raise Exception('Not possible to merge data that contains covobs!') 1524 new_dict = {} 1525 idl_dict = {} 1526 for o in list_of_obs: 1527 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1528 for key in set(o.deltas) | set(o.r_values)}) 1529 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1530 1531 names = sorted(new_dict.keys()) 1532 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1533 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1534 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1535 return o 1536 1537 1538def cov_Obs(means, cov, name, grad=None): 1539 """Create an Obs based on mean(s) and a covariance matrix 1540 1541 Parameters 1542 ---------- 1543 mean : list of floats or float 1544 N mean value(s) of the new Obs 1545 cov : list or array 1546 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1547 name : str 1548 identifier for the covariance matrix 1549 grad : list or array 1550 Gradient of the Covobs wrt. the means belonging to cov. 1551 """ 1552 1553 def covobs_to_obs(co): 1554 """Make an Obs out of a Covobs 1555 1556 Parameters 1557 ---------- 1558 co : Covobs 1559 Covobs to be embedded into the Obs 1560 """ 1561 o = Obs([], [], means=[]) 1562 o._value = co.value 1563 o.names.append(co.name) 1564 o._covobs[co.name] = co 1565 o._dvalue = np.sqrt(co.errsq()) 1566 return o 1567 1568 ol = [] 1569 if isinstance(means, (float, int)): 1570 means = [means] 1571 1572 for i in range(len(means)): 1573 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1574 if ol[0].covobs[name].N != len(means): 1575 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1576 if len(ol) == 1: 1577 return ol[0] 1578 return ol
14class Obs: 15 """Class for a general observable. 16 17 Instances of Obs are the basic objects of a pyerrors error analysis. 18 They are initialized with a list which contains arrays of samples for 19 different ensembles/replica and another list of same length which contains 20 the names of the ensembles/replica. Mathematical operations can be 21 performed on instances. The result is another instance of Obs. The error of 22 an instance can be computed with the gamma_method. Also contains additional 23 methods for output and visualization of the error calculation. 24 25 Attributes 26 ---------- 27 S_global : float 28 Standard value for S (default 2.0) 29 S_dict : dict 30 Dictionary for S values. If an entry for a given ensemble 31 exists this overwrites the standard value for that ensemble. 32 tau_exp_global : float 33 Standard value for tau_exp (default 0.0) 34 tau_exp_dict : dict 35 Dictionary for tau_exp values. If an entry for a given ensemble exists 36 this overwrites the standard value for that ensemble. 37 N_sigma_global : float 38 Standard value for N_sigma (default 1.0) 39 N_sigma_dict : dict 40 Dictionary for N_sigma values. If an entry for a given ensemble exists 41 this overwrites the standard value for that ensemble. 42 """ 43 __slots__ = ['names', 'shape', 'r_values', 'deltas', 'N', '_value', '_dvalue', 44 'ddvalue', 'reweighted', 'S', 'tau_exp', 'N_sigma', 45 'e_dvalue', 'e_ddvalue', 'e_tauint', 'e_dtauint', 46 'e_windowsize', 'e_rho', 'e_drho', 'e_n_tauint', 'e_n_dtauint', 47 'idl', 'is_merged', 'tag', '_covobs', '__dict__'] 48 49 S_global = 2.0 50 S_dict = {} 51 tau_exp_global = 0.0 52 tau_exp_dict = {} 53 N_sigma_global = 1.0 54 N_sigma_dict = {} 55 filter_eps = 1e-10 56 57 def __init__(self, samples, names, idl=None, **kwargs): 58 """ Initialize Obs object. 59 60 Parameters 61 ---------- 62 samples : list 63 list of numpy arrays containing the Monte Carlo samples 64 names : list 65 list of strings labeling the individual samples 66 idl : list, optional 67 list of ranges or lists on which the samples are defined 68 """ 69 70 if kwargs.get("means") is None and len(samples): 71 if len(samples) != len(names): 72 raise Exception('Length of samples and names incompatible.') 73 if idl is not None: 74 if len(idl) != len(names): 75 raise Exception('Length of idl incompatible with samples and names.') 76 name_length = len(names) 77 if name_length > 1: 78 if name_length != len(set(names)): 79 raise Exception('names are not unique.') 80 if not all(isinstance(x, str) for x in names): 81 raise TypeError('All names have to be strings.') 82 else: 83 if not isinstance(names[0], str): 84 raise TypeError('All names have to be strings.') 85 if min(len(x) for x in samples) <= 4: 86 raise Exception('Samples have to have at least 5 entries.') 87 88 self.names = sorted(names) 89 self.shape = {} 90 self.r_values = {} 91 self.deltas = {} 92 self._covobs = {} 93 94 self._value = 0 95 self.N = 0 96 self.is_merged = {} 97 self.idl = {} 98 if idl is not None: 99 for name, idx in sorted(zip(names, idl)): 100 if isinstance(idx, range): 101 self.idl[name] = idx 102 elif isinstance(idx, (list, np.ndarray)): 103 dc = np.unique(np.diff(idx)) 104 if np.any(dc < 0): 105 raise Exception("Unsorted idx for idl[%s]" % (name)) 106 if len(dc) == 1: 107 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 108 else: 109 self.idl[name] = list(idx) 110 else: 111 raise Exception('incompatible type for idl[%s].' % (name)) 112 else: 113 for name, sample in sorted(zip(names, samples)): 114 self.idl[name] = range(1, len(sample) + 1) 115 116 if kwargs.get("means") is not None: 117 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 118 self.shape[name] = len(self.idl[name]) 119 self.N += self.shape[name] 120 self.r_values[name] = mean 121 self.deltas[name] = sample 122 else: 123 for name, sample in sorted(zip(names, samples)): 124 self.shape[name] = len(self.idl[name]) 125 self.N += self.shape[name] 126 if len(sample) != self.shape[name]: 127 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 128 self.r_values[name] = np.mean(sample) 129 self.deltas[name] = sample - self.r_values[name] 130 self._value += self.shape[name] * self.r_values[name] 131 self._value /= self.N 132 133 self._dvalue = 0.0 134 self.ddvalue = 0.0 135 self.reweighted = False 136 137 self.tag = None 138 139 @property 140 def value(self): 141 return self._value 142 143 @property 144 def dvalue(self): 145 return self._dvalue 146 147 @property 148 def e_names(self): 149 return sorted(set([o.split('|')[0] for o in self.names])) 150 151 @property 152 def cov_names(self): 153 return sorted(set([o for o in self.covobs.keys()])) 154 155 @property 156 def mc_names(self): 157 return sorted(set([o.split('|')[0] for o in self.names if o not in self.cov_names])) 158 159 @property 160 def e_content(self): 161 res = {} 162 for e, e_name in enumerate(self.e_names): 163 res[e_name] = sorted(filter(lambda x: x.startswith(e_name + '|'), self.names)) 164 if e_name in self.names: 165 res[e_name].append(e_name) 166 return res 167 168 @property 169 def covobs(self): 170 return self._covobs 171 172 def gamma_method(self, **kwargs): 173 """Estimate the error and related properties of the Obs. 174 175 Parameters 176 ---------- 177 S : float 178 specifies a custom value for the parameter S (default 2.0). 179 If set to 0 it is assumed that the data exhibits no 180 autocorrelation. In this case the error estimates coincides 181 with the sample standard error. 182 tau_exp : float 183 positive value triggers the critical slowing down analysis 184 (default 0.0). 185 N_sigma : float 186 number of standard deviations from zero until the tail is 187 attached to the autocorrelation function (default 1). 188 fft : bool 189 determines whether the fft algorithm is used for the computation 190 of the autocorrelation function (default True) 191 """ 192 193 e_content = self.e_content 194 self.e_dvalue = {} 195 self.e_ddvalue = {} 196 self.e_tauint = {} 197 self.e_dtauint = {} 198 self.e_windowsize = {} 199 self.e_n_tauint = {} 200 self.e_n_dtauint = {} 201 e_gamma = {} 202 self.e_rho = {} 203 self.e_drho = {} 204 self._dvalue = 0 205 self.ddvalue = 0 206 207 self.S = {} 208 self.tau_exp = {} 209 self.N_sigma = {} 210 211 if kwargs.get('fft') is False: 212 fft = False 213 else: 214 fft = True 215 216 def _parse_kwarg(kwarg_name): 217 if kwarg_name in kwargs: 218 tmp = kwargs.get(kwarg_name) 219 if isinstance(tmp, (int, float)): 220 if tmp < 0: 221 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 222 for e, e_name in enumerate(self.e_names): 223 getattr(self, kwarg_name)[e_name] = tmp 224 else: 225 raise TypeError(kwarg_name + ' is not in proper format.') 226 else: 227 for e, e_name in enumerate(self.e_names): 228 if e_name in getattr(Obs, kwarg_name + '_dict'): 229 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 230 else: 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 232 233 _parse_kwarg('S') 234 _parse_kwarg('tau_exp') 235 _parse_kwarg('N_sigma') 236 237 for e, e_name in enumerate(self.mc_names): 238 r_length = [] 239 for r_name in e_content[e_name]: 240 if isinstance(self.idl[r_name], range): 241 r_length.append(len(self.idl[r_name])) 242 else: 243 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 244 245 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 246 w_max = max(r_length) // 2 247 e_gamma[e_name] = np.zeros(w_max) 248 self.e_rho[e_name] = np.zeros(w_max) 249 self.e_drho[e_name] = np.zeros(w_max) 250 251 for r_name in e_content[e_name]: 252 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 253 254 gamma_div = np.zeros(w_max) 255 for r_name in e_content[e_name]: 256 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 257 gamma_div[gamma_div < 1] = 1.0 258 e_gamma[e_name] /= gamma_div[:w_max] 259 260 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 261 self.e_tauint[e_name] = 0.5 262 self.e_dtauint[e_name] = 0.0 263 self.e_dvalue[e_name] = 0.0 264 self.e_ddvalue[e_name] = 0.0 265 self.e_windowsize[e_name] = 0 266 continue 267 268 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 269 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 270 # Make sure no entry of tauint is smaller than 0.5 271 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 272 # hep-lat/0306017 eq. (42) 273 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 274 self.e_n_dtauint[e_name][0] = 0.0 275 276 def _compute_drho(i): 277 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 278 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 279 280 _compute_drho(1) 281 if self.tau_exp[e_name] > 0: 282 texp = self.tau_exp[e_name] 283 # Critical slowing down analysis 284 if w_max // 2 <= 1: 285 raise Exception("Need at least 8 samples for tau_exp error analysis") 286 for n in range(1, w_max // 2): 287 _compute_drho(n + 1) 288 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 289 # Bias correction hep-lat/0306017 eq. (49) included 290 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 291 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 292 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 293 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 294 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 295 self.e_windowsize[e_name] = n 296 break 297 else: 298 if self.S[e_name] == 0.0: 299 self.e_tauint[e_name] = 0.5 300 self.e_dtauint[e_name] = 0.0 301 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 302 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 303 self.e_windowsize[e_name] = 0 304 else: 305 # Standard automatic windowing procedure 306 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 307 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 308 for n in range(1, w_max): 309 if n < w_max // 2 - 2: 310 _compute_drho(n + 1) 311 if g_w[n - 1] < 0 or n >= w_max - 1: 312 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 313 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 314 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 315 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 316 self.e_windowsize[e_name] = n 317 break 318 319 self._dvalue += self.e_dvalue[e_name] ** 2 320 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 321 322 for e_name in self.cov_names: 323 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 324 self.e_ddvalue[e_name] = 0 325 self._dvalue += self.e_dvalue[e_name]**2 326 327 self._dvalue = np.sqrt(self._dvalue) 328 if self._dvalue == 0.0: 329 self.ddvalue = 0.0 330 else: 331 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 332 return 333 334 def _calc_gamma(self, deltas, idx, shape, w_max, fft): 335 """Calculate Gamma_{AA} from the deltas, which are defined on idx. 336 idx is assumed to be a contiguous range (possibly with a stepsize != 1) 337 338 Parameters 339 ---------- 340 deltas : list 341 List of fluctuations 342 idx : list 343 List or range of configurations on which the deltas are defined. 344 shape : int 345 Number of configurations in idx. 346 w_max : int 347 Upper bound for the summation window. 348 fft : bool 349 determines whether the fft algorithm is used for the computation 350 of the autocorrelation function. 351 """ 352 gamma = np.zeros(w_max) 353 deltas = _expand_deltas(deltas, idx, shape) 354 new_shape = len(deltas) 355 if fft: 356 max_gamma = min(new_shape, w_max) 357 # The padding for the fft has to be even 358 padding = new_shape + max_gamma + (new_shape + max_gamma) % 2 359 gamma[:max_gamma] += np.fft.irfft(np.abs(np.fft.rfft(deltas, padding)) ** 2)[:max_gamma] 360 else: 361 for n in range(w_max): 362 if new_shape - n >= 0: 363 gamma[n] += deltas[0:new_shape - n].dot(deltas[n:new_shape]) 364 365 return gamma 366 367 def details(self, ens_content=True): 368 """Output detailed properties of the Obs. 369 370 Parameters 371 ---------- 372 ens_content : bool 373 print details about the ensembles and replica if true. 374 """ 375 if self.tag is not None: 376 print("Description:", self.tag) 377 if not hasattr(self, 'e_dvalue'): 378 print('Result\t %3.8e' % (self.value)) 379 else: 380 if self.value == 0.0: 381 percentage = np.nan 382 else: 383 percentage = np.abs(self._dvalue / self.value) * 100 384 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 385 if len(self.e_names) > 1: 386 print(' Ensemble errors:') 387 for e_name in self.mc_names: 388 if len(self.e_names) > 1: 389 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 390 if self.tau_exp[e_name] > 0: 391 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 392 else: 393 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 394 for e_name in self.cov_names: 395 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 396 if ens_content is True: 397 if len(self.e_names) == 1: 398 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 399 else: 400 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 401 my_string_list = [] 402 for key, value in sorted(self.e_content.items()): 403 if key not in self.covobs: 404 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 405 if len(value) == 1: 406 my_string += f': {self.shape[value[0]]} configurations' 407 if isinstance(self.idl[value[0]], range): 408 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 409 else: 410 my_string += ' (irregular range)' 411 else: 412 sublist = [] 413 for v in value: 414 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 415 my_substring += f': {self.shape[v]} configurations' 416 if isinstance(self.idl[v], range): 417 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 418 else: 419 my_substring += ' (irregular range)' 420 sublist.append(my_substring) 421 422 my_string += '\n' + '\n'.join(sublist) 423 else: 424 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 425 my_string_list.append(my_string) 426 print('\n'.join(my_string_list)) 427 428 def is_zero_within_error(self, sigma=1): 429 """Checks whether the observable is zero within 'sigma' standard errors. 430 431 Parameters 432 ---------- 433 sigma : int 434 Number of standard errors used for the check. 435 436 Works only properly when the gamma method was run. 437 """ 438 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue 439 440 def is_zero(self, atol=1e-10): 441 """Checks whether the observable is zero within a given tolerance. 442 443 Parameters 444 ---------- 445 atol : float 446 Absolute tolerance (for details see numpy documentation). 447 """ 448 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values()) 449 450 def plot_tauint(self, save=None): 451 """Plot integrated autocorrelation time for each ensemble. 452 453 Parameters 454 ---------- 455 save : str 456 saves the figure to a file named 'save' if. 457 """ 458 if not hasattr(self, 'e_dvalue'): 459 raise Exception('Run the gamma method first.') 460 461 for e, e_name in enumerate(self.mc_names): 462 fig = plt.figure() 463 plt.xlabel(r'$W$') 464 plt.ylabel(r'$\tau_\mathrm{int}$') 465 length = int(len(self.e_n_tauint[e_name])) 466 if self.tau_exp[e_name] > 0: 467 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 468 x_help = np.arange(2 * self.tau_exp[e_name]) 469 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 470 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 471 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 472 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 473 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 474 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 475 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 476 else: 477 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 478 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 479 480 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 481 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 482 plt.legend() 483 plt.xlim(-0.5, xmax) 484 ylim = plt.ylim() 485 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 486 plt.draw() 487 if save: 488 fig.savefig(save + "_" + str(e)) 489 490 def plot_rho(self, save=None): 491 """Plot normalized autocorrelation function time for each ensemble. 492 493 Parameters 494 ---------- 495 save : str 496 saves the figure to a file named 'save' if. 497 """ 498 if not hasattr(self, 'e_dvalue'): 499 raise Exception('Run the gamma method first.') 500 for e, e_name in enumerate(self.mc_names): 501 fig = plt.figure() 502 plt.xlabel('W') 503 plt.ylabel('rho') 504 length = int(len(self.e_drho[e_name])) 505 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 506 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 507 if self.tau_exp[e_name] > 0: 508 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 509 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 510 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 511 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 512 else: 513 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 514 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 515 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 516 plt.xlim(-0.5, xmax) 517 plt.draw() 518 if save: 519 fig.savefig(save + "_" + str(e)) 520 521 def plot_rep_dist(self): 522 """Plot replica distribution for each ensemble with more than one replicum.""" 523 if not hasattr(self, 'e_dvalue'): 524 raise Exception('Run the gamma method first.') 525 for e, e_name in enumerate(self.mc_names): 526 if len(self.e_content[e_name]) == 1: 527 print('No replica distribution for a single replicum (', e_name, ')') 528 continue 529 r_length = [] 530 sub_r_mean = 0 531 for r, r_name in enumerate(self.e_content[e_name]): 532 r_length.append(len(self.deltas[r_name])) 533 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 534 e_N = np.sum(r_length) 535 sub_r_mean /= e_N 536 arr = np.zeros(len(self.e_content[e_name])) 537 for r, r_name in enumerate(self.e_content[e_name]): 538 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 539 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 540 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 541 plt.draw() 542 543 def plot_history(self, expand=True): 544 """Plot derived Monte Carlo history for each ensemble 545 546 Parameters 547 ---------- 548 expand : bool 549 show expanded history for irregular Monte Carlo chains (default: True). 550 """ 551 for e, e_name in enumerate(self.mc_names): 552 plt.figure() 553 r_length = [] 554 tmp = [] 555 tmp_expanded = [] 556 for r, r_name in enumerate(self.e_content[e_name]): 557 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 558 if expand: 559 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 560 r_length.append(len(tmp_expanded[-1])) 561 else: 562 r_length.append(len(tmp[-1])) 563 e_N = np.sum(r_length) 564 x = np.arange(e_N) 565 y_test = np.concatenate(tmp, axis=0) 566 if expand: 567 y = np.concatenate(tmp_expanded, axis=0) 568 else: 569 y = y_test 570 plt.errorbar(x, y, fmt='.', markersize=3) 571 plt.xlim(-0.5, e_N - 0.5) 572 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 573 plt.draw() 574 575 def plot_piechart(self, save=None): 576 """Plot piechart which shows the fractional contribution of each 577 ensemble to the error and returns a dictionary containing the fractions. 578 579 Parameters 580 ---------- 581 save : str 582 saves the figure to a file named 'save' if. 583 """ 584 if not hasattr(self, 'e_dvalue'): 585 raise Exception('Run the gamma method first.') 586 if np.isclose(0.0, self._dvalue, atol=1e-15): 587 raise Exception('Error is 0.0') 588 labels = self.e_names 589 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 590 fig1, ax1 = plt.subplots() 591 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 592 ax1.axis('equal') 593 plt.draw() 594 if save: 595 fig1.savefig(save) 596 597 return dict(zip(self.e_names, sizes)) 598 599 def dump(self, filename, datatype="json.gz", description="", **kwargs): 600 """Dump the Obs to a file 'name' of chosen format. 601 602 Parameters 603 ---------- 604 filename : str 605 name of the file to be saved. 606 datatype : str 607 Format of the exported file. Supported formats include 608 "json.gz" and "pickle" 609 description : str 610 Description for output file, only relevant for json.gz format. 611 path : str 612 specifies a custom path for the file (default '.') 613 """ 614 if 'path' in kwargs: 615 file_name = kwargs.get('path') + '/' + filename 616 else: 617 file_name = filename 618 619 if datatype == "json.gz": 620 from .input.json import dump_to_json 621 dump_to_json([self], file_name, description=description) 622 elif datatype == "pickle": 623 with open(file_name + '.p', 'wb') as fb: 624 pickle.dump(self, fb) 625 else: 626 raise Exception("Unknown datatype " + str(datatype)) 627 628 def export_jackknife(self): 629 """Export jackknife samples from the Obs 630 631 Returns 632 ------- 633 numpy.ndarray 634 Returns a numpy array of length N + 1 where N is the number of samples 635 for the given ensemble and replicum. The zeroth entry of the array contains 636 the mean value of the Obs, entries 1 to N contain the N jackknife samples 637 derived from the Obs. The current implementation only works for observables 638 defined on exactly one ensemble and replicum. The derived jackknife samples 639 should agree with samples from a full jackknife analysis up to O(1/N). 640 """ 641 642 if len(self.names) != 1: 643 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 644 645 name = self.names[0] 646 full_data = self.deltas[name] + self.r_values[name] 647 n = full_data.size 648 mean = self.value 649 tmp_jacks = np.zeros(n + 1) 650 tmp_jacks[0] = mean 651 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 652 return tmp_jacks 653 654 def __float__(self): 655 return float(self.value) 656 657 def __repr__(self): 658 return 'Obs[' + str(self) + ']' 659 660 def __str__(self): 661 if self._dvalue == 0.0: 662 return str(self.value) 663 fexp = np.floor(np.log10(self._dvalue)) 664 if fexp < 0.0: 665 return '{:{form}}({:2.0f})'.format(self.value, self._dvalue * 10 ** (-fexp + 1), form='.' + str(-int(fexp) + 1) + 'f') 666 elif fexp == 0.0: 667 return '{:.1f}({:1.1f})'.format(self.value, self._dvalue) 668 else: 669 return '{:.0f}({:2.0f})'.format(self.value, self._dvalue) 670 671 # Overload comparisons 672 def __lt__(self, other): 673 return self.value < other 674 675 def __le__(self, other): 676 return self.value <= other 677 678 def __gt__(self, other): 679 return self.value > other 680 681 def __ge__(self, other): 682 return self.value >= other 683 684 def __eq__(self, other): 685 return (self - other).is_zero() 686 687 def __ne__(self, other): 688 return not (self - other).is_zero() 689 690 # Overload math operations 691 def __add__(self, y): 692 if isinstance(y, Obs): 693 return derived_observable(lambda x, **kwargs: x[0] + x[1], [self, y], man_grad=[1, 1]) 694 else: 695 if isinstance(y, np.ndarray): 696 return np.array([self + o for o in y]) 697 elif y.__class__.__name__ in ['Corr', 'CObs']: 698 return NotImplemented 699 else: 700 return derived_observable(lambda x, **kwargs: x[0] + y, [self], man_grad=[1]) 701 702 def __radd__(self, y): 703 return self + y 704 705 def __mul__(self, y): 706 if isinstance(y, Obs): 707 return derived_observable(lambda x, **kwargs: x[0] * x[1], [self, y], man_grad=[y.value, self.value]) 708 else: 709 if isinstance(y, np.ndarray): 710 return np.array([self * o for o in y]) 711 elif isinstance(y, complex): 712 return CObs(self * y.real, self * y.imag) 713 elif y.__class__.__name__ in ['Corr', 'CObs']: 714 return NotImplemented 715 else: 716 return derived_observable(lambda x, **kwargs: x[0] * y, [self], man_grad=[y]) 717 718 def __rmul__(self, y): 719 return self * y 720 721 def __sub__(self, y): 722 if isinstance(y, Obs): 723 return derived_observable(lambda x, **kwargs: x[0] - x[1], [self, y], man_grad=[1, -1]) 724 else: 725 if isinstance(y, np.ndarray): 726 return np.array([self - o for o in y]) 727 elif y.__class__.__name__ in ['Corr', 'CObs']: 728 return NotImplemented 729 else: 730 return derived_observable(lambda x, **kwargs: x[0] - y, [self], man_grad=[1]) 731 732 def __rsub__(self, y): 733 return -1 * (self - y) 734 735 def __pos__(self): 736 return self 737 738 def __neg__(self): 739 return -1 * self 740 741 def __truediv__(self, y): 742 if isinstance(y, Obs): 743 return derived_observable(lambda x, **kwargs: x[0] / x[1], [self, y], man_grad=[1 / y.value, - self.value / y.value ** 2]) 744 else: 745 if isinstance(y, np.ndarray): 746 return np.array([self / o for o in y]) 747 elif y.__class__.__name__ in ['Corr', 'CObs']: 748 return NotImplemented 749 else: 750 return derived_observable(lambda x, **kwargs: x[0] / y, [self], man_grad=[1 / y]) 751 752 def __rtruediv__(self, y): 753 if isinstance(y, Obs): 754 return derived_observable(lambda x, **kwargs: x[0] / x[1], [y, self], man_grad=[1 / self.value, - y.value / self.value ** 2]) 755 else: 756 if isinstance(y, np.ndarray): 757 return np.array([o / self for o in y]) 758 elif y.__class__.__name__ in ['Corr', 'CObs']: 759 return NotImplemented 760 else: 761 return derived_observable(lambda x, **kwargs: y / x[0], [self], man_grad=[-y / self.value ** 2]) 762 763 def __pow__(self, y): 764 if isinstance(y, Obs): 765 return derived_observable(lambda x: x[0] ** x[1], [self, y]) 766 else: 767 return derived_observable(lambda x: x[0] ** y, [self]) 768 769 def __rpow__(self, y): 770 if isinstance(y, Obs): 771 return derived_observable(lambda x: x[0] ** x[1], [y, self]) 772 else: 773 return derived_observable(lambda x: y ** x[0], [self]) 774 775 def __abs__(self): 776 return derived_observable(lambda x: anp.abs(x[0]), [self]) 777 778 # Overload numpy functions 779 def sqrt(self): 780 return derived_observable(lambda x, **kwargs: np.sqrt(x[0]), [self], man_grad=[1 / 2 / np.sqrt(self.value)]) 781 782 def log(self): 783 return derived_observable(lambda x, **kwargs: np.log(x[0]), [self], man_grad=[1 / self.value]) 784 785 def exp(self): 786 return derived_observable(lambda x, **kwargs: np.exp(x[0]), [self], man_grad=[np.exp(self.value)]) 787 788 def sin(self): 789 return derived_observable(lambda x, **kwargs: np.sin(x[0]), [self], man_grad=[np.cos(self.value)]) 790 791 def cos(self): 792 return derived_observable(lambda x, **kwargs: np.cos(x[0]), [self], man_grad=[-np.sin(self.value)]) 793 794 def tan(self): 795 return derived_observable(lambda x, **kwargs: np.tan(x[0]), [self], man_grad=[1 / np.cos(self.value) ** 2]) 796 797 def arcsin(self): 798 return derived_observable(lambda x: anp.arcsin(x[0]), [self]) 799 800 def arccos(self): 801 return derived_observable(lambda x: anp.arccos(x[0]), [self]) 802 803 def arctan(self): 804 return derived_observable(lambda x: anp.arctan(x[0]), [self]) 805 806 def sinh(self): 807 return derived_observable(lambda x, **kwargs: np.sinh(x[0]), [self], man_grad=[np.cosh(self.value)]) 808 809 def cosh(self): 810 return derived_observable(lambda x, **kwargs: np.cosh(x[0]), [self], man_grad=[np.sinh(self.value)]) 811 812 def tanh(self): 813 return derived_observable(lambda x, **kwargs: np.tanh(x[0]), [self], man_grad=[1 / np.cosh(self.value) ** 2]) 814 815 def arcsinh(self): 816 return derived_observable(lambda x: anp.arcsinh(x[0]), [self]) 817 818 def arccosh(self): 819 return derived_observable(lambda x: anp.arccosh(x[0]), [self]) 820 821 def arctanh(self): 822 return derived_observable(lambda x: anp.arctanh(x[0]), [self])
Class for a general observable.
Instances of Obs are the basic objects of a pyerrors error analysis. They are initialized with a list which contains arrays of samples for different ensembles/replica and another list of same length which contains the names of the ensembles/replica. Mathematical operations can be performed on instances. The result is another instance of Obs. The error of an instance can be computed with the gamma_method. Also contains additional methods for output and visualization of the error calculation.
Attributes
- S_global (float): Standard value for S (default 2.0)
- S_dict (dict): Dictionary for S values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- tau_exp_global (float): Standard value for tau_exp (default 0.0)
- tau_exp_dict (dict): Dictionary for tau_exp values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
- N_sigma_global (float): Standard value for N_sigma (default 1.0)
- N_sigma_dict (dict): Dictionary for N_sigma values. If an entry for a given ensemble exists this overwrites the standard value for that ensemble.
57 def __init__(self, samples, names, idl=None, **kwargs): 58 """ Initialize Obs object. 59 60 Parameters 61 ---------- 62 samples : list 63 list of numpy arrays containing the Monte Carlo samples 64 names : list 65 list of strings labeling the individual samples 66 idl : list, optional 67 list of ranges or lists on which the samples are defined 68 """ 69 70 if kwargs.get("means") is None and len(samples): 71 if len(samples) != len(names): 72 raise Exception('Length of samples and names incompatible.') 73 if idl is not None: 74 if len(idl) != len(names): 75 raise Exception('Length of idl incompatible with samples and names.') 76 name_length = len(names) 77 if name_length > 1: 78 if name_length != len(set(names)): 79 raise Exception('names are not unique.') 80 if not all(isinstance(x, str) for x in names): 81 raise TypeError('All names have to be strings.') 82 else: 83 if not isinstance(names[0], str): 84 raise TypeError('All names have to be strings.') 85 if min(len(x) for x in samples) <= 4: 86 raise Exception('Samples have to have at least 5 entries.') 87 88 self.names = sorted(names) 89 self.shape = {} 90 self.r_values = {} 91 self.deltas = {} 92 self._covobs = {} 93 94 self._value = 0 95 self.N = 0 96 self.is_merged = {} 97 self.idl = {} 98 if idl is not None: 99 for name, idx in sorted(zip(names, idl)): 100 if isinstance(idx, range): 101 self.idl[name] = idx 102 elif isinstance(idx, (list, np.ndarray)): 103 dc = np.unique(np.diff(idx)) 104 if np.any(dc < 0): 105 raise Exception("Unsorted idx for idl[%s]" % (name)) 106 if len(dc) == 1: 107 self.idl[name] = range(idx[0], idx[-1] + dc[0], dc[0]) 108 else: 109 self.idl[name] = list(idx) 110 else: 111 raise Exception('incompatible type for idl[%s].' % (name)) 112 else: 113 for name, sample in sorted(zip(names, samples)): 114 self.idl[name] = range(1, len(sample) + 1) 115 116 if kwargs.get("means") is not None: 117 for name, sample, mean in sorted(zip(names, samples, kwargs.get("means"))): 118 self.shape[name] = len(self.idl[name]) 119 self.N += self.shape[name] 120 self.r_values[name] = mean 121 self.deltas[name] = sample 122 else: 123 for name, sample in sorted(zip(names, samples)): 124 self.shape[name] = len(self.idl[name]) 125 self.N += self.shape[name] 126 if len(sample) != self.shape[name]: 127 raise Exception('Incompatible samples and idx for %s: %d vs. %d' % (name, len(sample), self.shape[name])) 128 self.r_values[name] = np.mean(sample) 129 self.deltas[name] = sample - self.r_values[name] 130 self._value += self.shape[name] * self.r_values[name] 131 self._value /= self.N 132 133 self._dvalue = 0.0 134 self.ddvalue = 0.0 135 self.reweighted = False 136 137 self.tag = None
Initialize Obs object.
Parameters
- samples (list): list of numpy arrays containing the Monte Carlo samples
- names (list): list of strings labeling the individual samples
- idl (list, optional): list of ranges or lists on which the samples are defined
172 def gamma_method(self, **kwargs): 173 """Estimate the error and related properties of the Obs. 174 175 Parameters 176 ---------- 177 S : float 178 specifies a custom value for the parameter S (default 2.0). 179 If set to 0 it is assumed that the data exhibits no 180 autocorrelation. In this case the error estimates coincides 181 with the sample standard error. 182 tau_exp : float 183 positive value triggers the critical slowing down analysis 184 (default 0.0). 185 N_sigma : float 186 number of standard deviations from zero until the tail is 187 attached to the autocorrelation function (default 1). 188 fft : bool 189 determines whether the fft algorithm is used for the computation 190 of the autocorrelation function (default True) 191 """ 192 193 e_content = self.e_content 194 self.e_dvalue = {} 195 self.e_ddvalue = {} 196 self.e_tauint = {} 197 self.e_dtauint = {} 198 self.e_windowsize = {} 199 self.e_n_tauint = {} 200 self.e_n_dtauint = {} 201 e_gamma = {} 202 self.e_rho = {} 203 self.e_drho = {} 204 self._dvalue = 0 205 self.ddvalue = 0 206 207 self.S = {} 208 self.tau_exp = {} 209 self.N_sigma = {} 210 211 if kwargs.get('fft') is False: 212 fft = False 213 else: 214 fft = True 215 216 def _parse_kwarg(kwarg_name): 217 if kwarg_name in kwargs: 218 tmp = kwargs.get(kwarg_name) 219 if isinstance(tmp, (int, float)): 220 if tmp < 0: 221 raise Exception(kwarg_name + ' has to be larger or equal to 0.') 222 for e, e_name in enumerate(self.e_names): 223 getattr(self, kwarg_name)[e_name] = tmp 224 else: 225 raise TypeError(kwarg_name + ' is not in proper format.') 226 else: 227 for e, e_name in enumerate(self.e_names): 228 if e_name in getattr(Obs, kwarg_name + '_dict'): 229 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_dict')[e_name] 230 else: 231 getattr(self, kwarg_name)[e_name] = getattr(Obs, kwarg_name + '_global') 232 233 _parse_kwarg('S') 234 _parse_kwarg('tau_exp') 235 _parse_kwarg('N_sigma') 236 237 for e, e_name in enumerate(self.mc_names): 238 r_length = [] 239 for r_name in e_content[e_name]: 240 if isinstance(self.idl[r_name], range): 241 r_length.append(len(self.idl[r_name])) 242 else: 243 r_length.append((self.idl[r_name][-1] - self.idl[r_name][0] + 1)) 244 245 e_N = np.sum([self.shape[r_name] for r_name in e_content[e_name]]) 246 w_max = max(r_length) // 2 247 e_gamma[e_name] = np.zeros(w_max) 248 self.e_rho[e_name] = np.zeros(w_max) 249 self.e_drho[e_name] = np.zeros(w_max) 250 251 for r_name in e_content[e_name]: 252 e_gamma[e_name] += self._calc_gamma(self.deltas[r_name], self.idl[r_name], self.shape[r_name], w_max, fft) 253 254 gamma_div = np.zeros(w_max) 255 for r_name in e_content[e_name]: 256 gamma_div += self._calc_gamma(np.ones((self.shape[r_name])), self.idl[r_name], self.shape[r_name], w_max, fft) 257 gamma_div[gamma_div < 1] = 1.0 258 e_gamma[e_name] /= gamma_div[:w_max] 259 260 if np.abs(e_gamma[e_name][0]) < 10 * np.finfo(float).tiny: # Prevent division by zero 261 self.e_tauint[e_name] = 0.5 262 self.e_dtauint[e_name] = 0.0 263 self.e_dvalue[e_name] = 0.0 264 self.e_ddvalue[e_name] = 0.0 265 self.e_windowsize[e_name] = 0 266 continue 267 268 self.e_rho[e_name] = e_gamma[e_name][:w_max] / e_gamma[e_name][0] 269 self.e_n_tauint[e_name] = np.cumsum(np.concatenate(([0.5], self.e_rho[e_name][1:]))) 270 # Make sure no entry of tauint is smaller than 0.5 271 self.e_n_tauint[e_name][self.e_n_tauint[e_name] <= 0.5] = 0.5 + np.finfo(np.float64).eps 272 # hep-lat/0306017 eq. (42) 273 self.e_n_dtauint[e_name] = self.e_n_tauint[e_name] * 2 * np.sqrt(np.abs(np.arange(w_max) + 0.5 - self.e_n_tauint[e_name]) / e_N) 274 self.e_n_dtauint[e_name][0] = 0.0 275 276 def _compute_drho(i): 277 tmp = self.e_rho[e_name][i + 1:w_max] + np.concatenate([self.e_rho[e_name][i - 1::-1], self.e_rho[e_name][1:w_max - 2 * i]]) - 2 * self.e_rho[e_name][i] * self.e_rho[e_name][1:w_max - i] 278 self.e_drho[e_name][i] = np.sqrt(np.sum(tmp ** 2) / e_N) 279 280 _compute_drho(1) 281 if self.tau_exp[e_name] > 0: 282 texp = self.tau_exp[e_name] 283 # Critical slowing down analysis 284 if w_max // 2 <= 1: 285 raise Exception("Need at least 8 samples for tau_exp error analysis") 286 for n in range(1, w_max // 2): 287 _compute_drho(n + 1) 288 if (self.e_rho[e_name][n] - self.N_sigma[e_name] * self.e_drho[e_name][n]) < 0 or n >= w_max // 2 - 2: 289 # Bias correction hep-lat/0306017 eq. (49) included 290 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) + texp * np.abs(self.e_rho[e_name][n + 1]) # The absolute makes sure, that the tail contribution is always positive 291 self.e_dtauint[e_name] = np.sqrt(self.e_n_dtauint[e_name][n] ** 2 + texp ** 2 * self.e_drho[e_name][n + 1] ** 2) 292 # Error of tau_exp neglected so far, missing term: self.e_rho[e_name][n + 1] ** 2 * d_tau_exp ** 2 293 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 294 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 295 self.e_windowsize[e_name] = n 296 break 297 else: 298 if self.S[e_name] == 0.0: 299 self.e_tauint[e_name] = 0.5 300 self.e_dtauint[e_name] = 0.0 301 self.e_dvalue[e_name] = np.sqrt(e_gamma[e_name][0] / (e_N - 1)) 302 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt(0.5 / e_N) 303 self.e_windowsize[e_name] = 0 304 else: 305 # Standard automatic windowing procedure 306 tau = self.S[e_name] / np.log((2 * self.e_n_tauint[e_name][1:] + 1) / (2 * self.e_n_tauint[e_name][1:] - 1)) 307 g_w = np.exp(- np.arange(1, w_max) / tau) - tau / np.sqrt(np.arange(1, w_max) * e_N) 308 for n in range(1, w_max): 309 if n < w_max // 2 - 2: 310 _compute_drho(n + 1) 311 if g_w[n - 1] < 0 or n >= w_max - 1: 312 self.e_tauint[e_name] = self.e_n_tauint[e_name][n] * (1 + (2 * n + 1) / e_N) / (1 + 1 / e_N) # Bias correction hep-lat/0306017 eq. (49) 313 self.e_dtauint[e_name] = self.e_n_dtauint[e_name][n] 314 self.e_dvalue[e_name] = np.sqrt(2 * self.e_tauint[e_name] * e_gamma[e_name][0] * (1 + 1 / e_N) / e_N) 315 self.e_ddvalue[e_name] = self.e_dvalue[e_name] * np.sqrt((n + 0.5) / e_N) 316 self.e_windowsize[e_name] = n 317 break 318 319 self._dvalue += self.e_dvalue[e_name] ** 2 320 self.ddvalue += (self.e_dvalue[e_name] * self.e_ddvalue[e_name]) ** 2 321 322 for e_name in self.cov_names: 323 self.e_dvalue[e_name] = np.sqrt(self.covobs[e_name].errsq()) 324 self.e_ddvalue[e_name] = 0 325 self._dvalue += self.e_dvalue[e_name]**2 326 327 self._dvalue = np.sqrt(self._dvalue) 328 if self._dvalue == 0.0: 329 self.ddvalue = 0.0 330 else: 331 self.ddvalue = np.sqrt(self.ddvalue) / self._dvalue 332 return
Estimate the error and related properties of the Obs.
Parameters
- S (float): specifies a custom value for the parameter S (default 2.0). If set to 0 it is assumed that the data exhibits no autocorrelation. In this case the error estimates coincides with the sample standard error.
- tau_exp (float): positive value triggers the critical slowing down analysis (default 0.0).
- N_sigma (float): number of standard deviations from zero until the tail is attached to the autocorrelation function (default 1).
- fft (bool): determines whether the fft algorithm is used for the computation of the autocorrelation function (default True)
367 def details(self, ens_content=True): 368 """Output detailed properties of the Obs. 369 370 Parameters 371 ---------- 372 ens_content : bool 373 print details about the ensembles and replica if true. 374 """ 375 if self.tag is not None: 376 print("Description:", self.tag) 377 if not hasattr(self, 'e_dvalue'): 378 print('Result\t %3.8e' % (self.value)) 379 else: 380 if self.value == 0.0: 381 percentage = np.nan 382 else: 383 percentage = np.abs(self._dvalue / self.value) * 100 384 print('Result\t %3.8e +/- %3.8e +/- %3.8e (%3.3f%%)' % (self.value, self._dvalue, self.ddvalue, percentage)) 385 if len(self.e_names) > 1: 386 print(' Ensemble errors:') 387 for e_name in self.mc_names: 388 if len(self.e_names) > 1: 389 print('', e_name, '\t %3.8e +/- %3.8e' % (self.e_dvalue[e_name], self.e_ddvalue[e_name])) 390 if self.tau_exp[e_name] > 0: 391 print(' t_int\t %3.8e +/- %3.8e tau_exp = %3.2f, N_sigma = %1.0i' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.tau_exp[e_name], self.N_sigma[e_name])) 392 else: 393 print(' t_int\t %3.8e +/- %3.8e S = %3.2f' % (self.e_tauint[e_name], self.e_dtauint[e_name], self.S[e_name])) 394 for e_name in self.cov_names: 395 print('', e_name, '\t %3.8e' % (self.e_dvalue[e_name])) 396 if ens_content is True: 397 if len(self.e_names) == 1: 398 print(self.N, 'samples in', len(self.e_names), 'ensemble:') 399 else: 400 print(self.N, 'samples in', len(self.e_names), 'ensembles:') 401 my_string_list = [] 402 for key, value in sorted(self.e_content.items()): 403 if key not in self.covobs: 404 my_string = ' ' + "\u00B7 Ensemble '" + key + "' " 405 if len(value) == 1: 406 my_string += f': {self.shape[value[0]]} configurations' 407 if isinstance(self.idl[value[0]], range): 408 my_string += f' (from {self.idl[value[0]].start} to {self.idl[value[0]][-1]}' + int(self.idl[value[0]].step != 1) * f' in steps of {self.idl[value[0]].step}' + ')' 409 else: 410 my_string += ' (irregular range)' 411 else: 412 sublist = [] 413 for v in value: 414 my_substring = ' ' + "\u00B7 Replicum '" + v[len(key) + 1:] + "' " 415 my_substring += f': {self.shape[v]} configurations' 416 if isinstance(self.idl[v], range): 417 my_substring += f' (from {self.idl[v].start} to {self.idl[v][-1]}' + int(self.idl[v].step != 1) * f' in steps of {self.idl[v].step}' + ')' 418 else: 419 my_substring += ' (irregular range)' 420 sublist.append(my_substring) 421 422 my_string += '\n' + '\n'.join(sublist) 423 else: 424 my_string = ' ' + "\u00B7 Covobs '" + key + "' " 425 my_string_list.append(my_string) 426 print('\n'.join(my_string_list))
Output detailed properties of the Obs.
Parameters
- ens_content (bool): print details about the ensembles and replica if true.
428 def is_zero_within_error(self, sigma=1): 429 """Checks whether the observable is zero within 'sigma' standard errors. 430 431 Parameters 432 ---------- 433 sigma : int 434 Number of standard errors used for the check. 435 436 Works only properly when the gamma method was run. 437 """ 438 return self.is_zero() or np.abs(self.value) <= sigma * self._dvalue
Checks whether the observable is zero within 'sigma' standard errors.
Parameters
- sigma (int): Number of standard errors used for the check.
- Works only properly when the gamma method was run.
440 def is_zero(self, atol=1e-10): 441 """Checks whether the observable is zero within a given tolerance. 442 443 Parameters 444 ---------- 445 atol : float 446 Absolute tolerance (for details see numpy documentation). 447 """ 448 return np.isclose(0.0, self.value, 1e-14, atol) and all(np.allclose(0.0, delta, 1e-14, atol) for delta in self.deltas.values()) and all(np.allclose(0.0, delta.errsq(), 1e-14, atol) for delta in self.covobs.values())
Checks whether the observable is zero within a given tolerance.
Parameters
- atol (float): Absolute tolerance (for details see numpy documentation).
450 def plot_tauint(self, save=None): 451 """Plot integrated autocorrelation time for each ensemble. 452 453 Parameters 454 ---------- 455 save : str 456 saves the figure to a file named 'save' if. 457 """ 458 if not hasattr(self, 'e_dvalue'): 459 raise Exception('Run the gamma method first.') 460 461 for e, e_name in enumerate(self.mc_names): 462 fig = plt.figure() 463 plt.xlabel(r'$W$') 464 plt.ylabel(r'$\tau_\mathrm{int}$') 465 length = int(len(self.e_n_tauint[e_name])) 466 if self.tau_exp[e_name] > 0: 467 base = self.e_n_tauint[e_name][self.e_windowsize[e_name]] 468 x_help = np.arange(2 * self.tau_exp[e_name]) 469 y_help = (x_help + 1) * np.abs(self.e_rho[e_name][self.e_windowsize[e_name] + 1]) * (1 - x_help / (2 * (2 * self.tau_exp[e_name] - 1))) + base 470 x_arr = np.arange(self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]) 471 plt.plot(x_arr, y_help, 'C' + str(e), linewidth=1, ls='--', marker=',') 472 plt.errorbar([self.e_windowsize[e_name] + 2 * self.tau_exp[e_name]], [self.e_tauint[e_name]], 473 yerr=[self.e_dtauint[e_name]], fmt='C' + str(e), linewidth=1, capsize=2, marker='o', mfc=plt.rcParams['axes.facecolor']) 474 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 475 label = e_name + r', $\tau_\mathrm{exp}$=' + str(np.around(self.tau_exp[e_name], decimals=2)) 476 else: 477 label = e_name + ', S=' + str(np.around(self.S[e_name], decimals=2)) 478 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 479 480 plt.errorbar(np.arange(length)[:int(xmax) + 1], self.e_n_tauint[e_name][:int(xmax) + 1], yerr=self.e_n_dtauint[e_name][:int(xmax) + 1], linewidth=1, capsize=2, label=label) 481 plt.axvline(x=self.e_windowsize[e_name], color='C' + str(e), alpha=0.5, marker=',', ls='--') 482 plt.legend() 483 plt.xlim(-0.5, xmax) 484 ylim = plt.ylim() 485 plt.ylim(bottom=0.0, top=max(1.0, ylim[1])) 486 plt.draw() 487 if save: 488 fig.savefig(save + "_" + str(e))
Plot integrated autocorrelation time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
490 def plot_rho(self, save=None): 491 """Plot normalized autocorrelation function time for each ensemble. 492 493 Parameters 494 ---------- 495 save : str 496 saves the figure to a file named 'save' if. 497 """ 498 if not hasattr(self, 'e_dvalue'): 499 raise Exception('Run the gamma method first.') 500 for e, e_name in enumerate(self.mc_names): 501 fig = plt.figure() 502 plt.xlabel('W') 503 plt.ylabel('rho') 504 length = int(len(self.e_drho[e_name])) 505 plt.errorbar(np.arange(length), self.e_rho[e_name][:length], yerr=self.e_drho[e_name][:], linewidth=1, capsize=2) 506 plt.axvline(x=self.e_windowsize[e_name], color='r', alpha=0.25, ls='--', marker=',') 507 if self.tau_exp[e_name] > 0: 508 plt.plot([self.e_windowsize[e_name] + 1, self.e_windowsize[e_name] + 1 + 2 * self.tau_exp[e_name]], 509 [self.e_rho[e_name][self.e_windowsize[e_name] + 1], 0], 'k-', lw=1) 510 xmax = self.e_windowsize[e_name] + 2 * self.tau_exp[e_name] + 1.5 511 plt.title('Rho ' + e_name + r', tau\_exp=' + str(np.around(self.tau_exp[e_name], decimals=2))) 512 else: 513 xmax = max(10.5, 2 * self.e_windowsize[e_name] - 0.5) 514 plt.title('Rho ' + e_name + ', S=' + str(np.around(self.S[e_name], decimals=2))) 515 plt.plot([-0.5, xmax], [0, 0], 'k--', lw=1) 516 plt.xlim(-0.5, xmax) 517 plt.draw() 518 if save: 519 fig.savefig(save + "_" + str(e))
Plot normalized autocorrelation function time for each ensemble.
Parameters
- save (str): saves the figure to a file named 'save' if.
521 def plot_rep_dist(self): 522 """Plot replica distribution for each ensemble with more than one replicum.""" 523 if not hasattr(self, 'e_dvalue'): 524 raise Exception('Run the gamma method first.') 525 for e, e_name in enumerate(self.mc_names): 526 if len(self.e_content[e_name]) == 1: 527 print('No replica distribution for a single replicum (', e_name, ')') 528 continue 529 r_length = [] 530 sub_r_mean = 0 531 for r, r_name in enumerate(self.e_content[e_name]): 532 r_length.append(len(self.deltas[r_name])) 533 sub_r_mean += self.shape[r_name] * self.r_values[r_name] 534 e_N = np.sum(r_length) 535 sub_r_mean /= e_N 536 arr = np.zeros(len(self.e_content[e_name])) 537 for r, r_name in enumerate(self.e_content[e_name]): 538 arr[r] = (self.r_values[r_name] - sub_r_mean) / (self.e_dvalue[e_name] * np.sqrt(e_N / self.shape[r_name] - 1)) 539 plt.hist(arr, rwidth=0.8, bins=len(self.e_content[e_name])) 540 plt.title('Replica distribution' + e_name + ' (mean=0, var=1)') 541 plt.draw()
Plot replica distribution for each ensemble with more than one replicum.
543 def plot_history(self, expand=True): 544 """Plot derived Monte Carlo history for each ensemble 545 546 Parameters 547 ---------- 548 expand : bool 549 show expanded history for irregular Monte Carlo chains (default: True). 550 """ 551 for e, e_name in enumerate(self.mc_names): 552 plt.figure() 553 r_length = [] 554 tmp = [] 555 tmp_expanded = [] 556 for r, r_name in enumerate(self.e_content[e_name]): 557 tmp.append(self.deltas[r_name] + self.r_values[r_name]) 558 if expand: 559 tmp_expanded.append(_expand_deltas(self.deltas[r_name], list(self.idl[r_name]), self.shape[r_name]) + self.r_values[r_name]) 560 r_length.append(len(tmp_expanded[-1])) 561 else: 562 r_length.append(len(tmp[-1])) 563 e_N = np.sum(r_length) 564 x = np.arange(e_N) 565 y_test = np.concatenate(tmp, axis=0) 566 if expand: 567 y = np.concatenate(tmp_expanded, axis=0) 568 else: 569 y = y_test 570 plt.errorbar(x, y, fmt='.', markersize=3) 571 plt.xlim(-0.5, e_N - 0.5) 572 plt.title(e_name + f'\nskew: {skew(y_test):.3f} (p={skewtest(y_test).pvalue:.3f}), kurtosis: {kurtosis(y_test):.3f} (p={kurtosistest(y_test).pvalue:.3f})') 573 plt.draw()
Plot derived Monte Carlo history for each ensemble
Parameters
- expand (bool): show expanded history for irregular Monte Carlo chains (default: True).
575 def plot_piechart(self, save=None): 576 """Plot piechart which shows the fractional contribution of each 577 ensemble to the error and returns a dictionary containing the fractions. 578 579 Parameters 580 ---------- 581 save : str 582 saves the figure to a file named 'save' if. 583 """ 584 if not hasattr(self, 'e_dvalue'): 585 raise Exception('Run the gamma method first.') 586 if np.isclose(0.0, self._dvalue, atol=1e-15): 587 raise Exception('Error is 0.0') 588 labels = self.e_names 589 sizes = [self.e_dvalue[name] ** 2 for name in labels] / self._dvalue ** 2 590 fig1, ax1 = plt.subplots() 591 ax1.pie(sizes, labels=labels, startangle=90, normalize=True) 592 ax1.axis('equal') 593 plt.draw() 594 if save: 595 fig1.savefig(save) 596 597 return dict(zip(self.e_names, sizes))
Plot piechart which shows the fractional contribution of each ensemble to the error and returns a dictionary containing the fractions.
Parameters
- save (str): saves the figure to a file named 'save' if.
599 def dump(self, filename, datatype="json.gz", description="", **kwargs): 600 """Dump the Obs to a file 'name' of chosen format. 601 602 Parameters 603 ---------- 604 filename : str 605 name of the file to be saved. 606 datatype : str 607 Format of the exported file. Supported formats include 608 "json.gz" and "pickle" 609 description : str 610 Description for output file, only relevant for json.gz format. 611 path : str 612 specifies a custom path for the file (default '.') 613 """ 614 if 'path' in kwargs: 615 file_name = kwargs.get('path') + '/' + filename 616 else: 617 file_name = filename 618 619 if datatype == "json.gz": 620 from .input.json import dump_to_json 621 dump_to_json([self], file_name, description=description) 622 elif datatype == "pickle": 623 with open(file_name + '.p', 'wb') as fb: 624 pickle.dump(self, fb) 625 else: 626 raise Exception("Unknown datatype " + str(datatype))
Dump the Obs to a file 'name' of chosen format.
Parameters
- filename (str): name of the file to be saved.
- datatype (str): Format of the exported file. Supported formats include "json.gz" and "pickle"
- description (str): Description for output file, only relevant for json.gz format.
- path (str): specifies a custom path for the file (default '.')
628 def export_jackknife(self): 629 """Export jackknife samples from the Obs 630 631 Returns 632 ------- 633 numpy.ndarray 634 Returns a numpy array of length N + 1 where N is the number of samples 635 for the given ensemble and replicum. The zeroth entry of the array contains 636 the mean value of the Obs, entries 1 to N contain the N jackknife samples 637 derived from the Obs. The current implementation only works for observables 638 defined on exactly one ensemble and replicum. The derived jackknife samples 639 should agree with samples from a full jackknife analysis up to O(1/N). 640 """ 641 642 if len(self.names) != 1: 643 raise Exception("'export_jackknife' is only implemented for Obs defined on one ensemble and replicum.") 644 645 name = self.names[0] 646 full_data = self.deltas[name] + self.r_values[name] 647 n = full_data.size 648 mean = self.value 649 tmp_jacks = np.zeros(n + 1) 650 tmp_jacks[0] = mean 651 tmp_jacks[1:] = (n * mean - full_data) / (n - 1) 652 return tmp_jacks
Export jackknife samples from the Obs
Returns
- numpy.ndarray: Returns a numpy array of length N + 1 where N is the number of samples for the given ensemble and replicum. The zeroth entry of the array contains the mean value of the Obs, entries 1 to N contain the N jackknife samples derived from the Obs. The current implementation only works for observables defined on exactly one ensemble and replicum. The derived jackknife samples should agree with samples from a full jackknife analysis up to O(1/N).
825class CObs: 826 """Class for a complex valued observable.""" 827 __slots__ = ['_real', '_imag', 'tag'] 828 829 def __init__(self, real, imag=0.0): 830 self._real = real 831 self._imag = imag 832 self.tag = None 833 834 @property 835 def real(self): 836 return self._real 837 838 @property 839 def imag(self): 840 return self._imag 841 842 def gamma_method(self, **kwargs): 843 """Executes the gamma_method for the real and the imaginary part.""" 844 if isinstance(self.real, Obs): 845 self.real.gamma_method(**kwargs) 846 if isinstance(self.imag, Obs): 847 self.imag.gamma_method(**kwargs) 848 849 def is_zero(self): 850 """Checks whether both real and imaginary part are zero within machine precision.""" 851 return self.real == 0.0 and self.imag == 0.0 852 853 def conjugate(self): 854 return CObs(self.real, -self.imag) 855 856 def __add__(self, other): 857 if isinstance(other, np.ndarray): 858 return other + self 859 elif hasattr(other, 'real') and hasattr(other, 'imag'): 860 return CObs(self.real + other.real, 861 self.imag + other.imag) 862 else: 863 return CObs(self.real + other, self.imag) 864 865 def __radd__(self, y): 866 return self + y 867 868 def __sub__(self, other): 869 if isinstance(other, np.ndarray): 870 return -1 * (other - self) 871 elif hasattr(other, 'real') and hasattr(other, 'imag'): 872 return CObs(self.real - other.real, self.imag - other.imag) 873 else: 874 return CObs(self.real - other, self.imag) 875 876 def __rsub__(self, other): 877 return -1 * (self - other) 878 879 def __mul__(self, other): 880 if isinstance(other, np.ndarray): 881 return other * self 882 elif hasattr(other, 'real') and hasattr(other, 'imag'): 883 if all(isinstance(i, Obs) for i in [self.real, self.imag, other.real, other.imag]): 884 return CObs(derived_observable(lambda x, **kwargs: x[0] * x[1] - x[2] * x[3], 885 [self.real, other.real, self.imag, other.imag], 886 man_grad=[other.real.value, self.real.value, -other.imag.value, -self.imag.value]), 887 derived_observable(lambda x, **kwargs: x[2] * x[1] + x[0] * x[3], 888 [self.real, other.real, self.imag, other.imag], 889 man_grad=[other.imag.value, self.imag.value, other.real.value, self.real.value])) 890 elif getattr(other, 'imag', 0) != 0: 891 return CObs(self.real * other.real - self.imag * other.imag, 892 self.imag * other.real + self.real * other.imag) 893 else: 894 return CObs(self.real * other.real, self.imag * other.real) 895 else: 896 return CObs(self.real * other, self.imag * other) 897 898 def __rmul__(self, other): 899 return self * other 900 901 def __truediv__(self, other): 902 if isinstance(other, np.ndarray): 903 return 1 / (other / self) 904 elif hasattr(other, 'real') and hasattr(other, 'imag'): 905 r = other.real ** 2 + other.imag ** 2 906 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.imag * other.real - self.real * other.imag) / r) 907 else: 908 return CObs(self.real / other, self.imag / other) 909 910 def __rtruediv__(self, other): 911 r = self.real ** 2 + self.imag ** 2 912 if hasattr(other, 'real') and hasattr(other, 'imag'): 913 return CObs((self.real * other.real + self.imag * other.imag) / r, (self.real * other.imag - self.imag * other.real) / r) 914 else: 915 return CObs(self.real * other / r, -self.imag * other / r) 916 917 def __abs__(self): 918 return np.sqrt(self.real**2 + self.imag**2) 919 920 def __pos__(self): 921 return self 922 923 def __neg__(self): 924 return -1 * self 925 926 def __eq__(self, other): 927 return self.real == other.real and self.imag == other.imag 928 929 def __str__(self): 930 return '(' + str(self.real) + int(self.imag >= 0.0) * '+' + str(self.imag) + 'j)' 931 932 def __repr__(self): 933 return 'CObs[' + str(self) + ']'
Class for a complex valued observable.
842 def gamma_method(self, **kwargs): 843 """Executes the gamma_method for the real and the imaginary part.""" 844 if isinstance(self.real, Obs): 845 self.real.gamma_method(**kwargs) 846 if isinstance(self.imag, Obs): 847 self.imag.gamma_method(**kwargs)
Executes the gamma_method for the real and the imaginary part.
1041def derived_observable(func, data, array_mode=False, **kwargs): 1042 """Construct a derived Obs according to func(data, **kwargs) using automatic differentiation. 1043 1044 Parameters 1045 ---------- 1046 func : object 1047 arbitrary function of the form func(data, **kwargs). For the 1048 automatic differentiation to work, all numpy functions have to have 1049 the autograd wrapper (use 'import autograd.numpy as anp'). 1050 data : list 1051 list of Obs, e.g. [obs1, obs2, obs3]. 1052 num_grad : bool 1053 if True, numerical derivatives are used instead of autograd 1054 (default False). To control the numerical differentiation the 1055 kwargs of numdifftools.step_generators.MaxStepGenerator 1056 can be used. 1057 man_grad : list 1058 manually supply a list or an array which contains the jacobian 1059 of func. Use cautiously, supplying the wrong derivative will 1060 not be intercepted. 1061 1062 Notes 1063 ----- 1064 For simple mathematical operations it can be practical to use anonymous 1065 functions. For the ratio of two observables one can e.g. use 1066 1067 new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2]) 1068 """ 1069 1070 data = np.asarray(data) 1071 raveled_data = data.ravel() 1072 1073 # Workaround for matrix operations containing non Obs data 1074 if not all(isinstance(x, Obs) for x in raveled_data): 1075 for i in range(len(raveled_data)): 1076 if isinstance(raveled_data[i], (int, float)): 1077 raveled_data[i] = cov_Obs(raveled_data[i], 0.0, "###dummy_covobs###") 1078 1079 allcov = {} 1080 for o in raveled_data: 1081 for name in o.cov_names: 1082 if name in allcov: 1083 if not np.allclose(allcov[name], o.covobs[name].cov): 1084 raise Exception('Inconsistent covariance matrices for %s!' % (name)) 1085 else: 1086 allcov[name] = o.covobs[name].cov 1087 1088 n_obs = len(raveled_data) 1089 new_names = sorted(set([y for x in [o.names for o in raveled_data] for y in x])) 1090 new_cov_names = sorted(set([y for x in [o.cov_names for o in raveled_data] for y in x])) 1091 new_sample_names = sorted(set(new_names) - set(new_cov_names)) 1092 1093 is_merged = {name: (len(list(filter(lambda o: o.is_merged.get(name, False) is True, raveled_data))) > 0) for name in new_sample_names} 1094 reweighted = len(list(filter(lambda o: o.reweighted is True, raveled_data))) > 0 1095 1096 if data.ndim == 1: 1097 values = np.array([o.value for o in data]) 1098 else: 1099 values = np.vectorize(lambda x: x.value)(data) 1100 1101 new_values = func(values, **kwargs) 1102 1103 multi = int(isinstance(new_values, np.ndarray)) 1104 1105 new_r_values = {} 1106 new_idl_d = {} 1107 for name in new_sample_names: 1108 idl = [] 1109 tmp_values = np.zeros(n_obs) 1110 for i, item in enumerate(raveled_data): 1111 tmp_values[i] = item.r_values.get(name, item.value) 1112 tmp_idl = item.idl.get(name) 1113 if tmp_idl is not None: 1114 idl.append(tmp_idl) 1115 if multi > 0: 1116 tmp_values = np.array(tmp_values).reshape(data.shape) 1117 new_r_values[name] = func(tmp_values, **kwargs) 1118 new_idl_d[name] = _merge_idx(idl) 1119 if not is_merged[name]: 1120 is_merged[name] = (1 != len(set([len(idx) for idx in [*idl, new_idl_d[name]]]))) 1121 1122 if 'man_grad' in kwargs: 1123 deriv = np.asarray(kwargs.get('man_grad')) 1124 if new_values.shape + data.shape != deriv.shape: 1125 raise Exception('Manual derivative does not have correct shape.') 1126 elif kwargs.get('num_grad') is True: 1127 if multi > 0: 1128 raise Exception('Multi mode currently not supported for numerical derivative') 1129 options = { 1130 'base_step': 0.1, 1131 'step_ratio': 2.5} 1132 for key in options.keys(): 1133 kwarg = kwargs.get(key) 1134 if kwarg is not None: 1135 options[key] = kwarg 1136 tmp_df = nd.Gradient(func, order=4, **{k: v for k, v in options.items() if v is not None})(values, **kwargs) 1137 if tmp_df.size == 1: 1138 deriv = np.array([tmp_df.real]) 1139 else: 1140 deriv = tmp_df.real 1141 else: 1142 deriv = jacobian(func)(values, **kwargs) 1143 1144 final_result = np.zeros(new_values.shape, dtype=object) 1145 1146 if array_mode is True: 1147 1148 class _Zero_grad(): 1149 def __init__(self, N): 1150 self.grad = np.zeros((N, 1)) 1151 1152 new_covobs_lengths = dict(set([y for x in [[(n, o.covobs[n].N) for n in o.cov_names] for o in raveled_data] for y in x])) 1153 d_extracted = {} 1154 g_extracted = {} 1155 for name in new_sample_names: 1156 d_extracted[name] = [] 1157 ens_length = len(new_idl_d[name]) 1158 for i_dat, dat in enumerate(data): 1159 d_extracted[name].append(np.array([_expand_deltas_for_merge(o.deltas.get(name, np.zeros(ens_length)), o.idl.get(name, new_idl_d[name]), o.shape.get(name, ens_length), new_idl_d[name]) for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (ens_length, ))) 1160 for name in new_cov_names: 1161 g_extracted[name] = [] 1162 zero_grad = _Zero_grad(new_covobs_lengths[name]) 1163 for i_dat, dat in enumerate(data): 1164 g_extracted[name].append(np.array([o.covobs.get(name, zero_grad).grad for o in dat.reshape(np.prod(dat.shape))]).reshape(dat.shape + (new_covobs_lengths[name], 1))) 1165 1166 for i_val, new_val in np.ndenumerate(new_values): 1167 new_deltas = {} 1168 new_grad = {} 1169 if array_mode is True: 1170 for name in new_sample_names: 1171 ens_length = d_extracted[name][0].shape[-1] 1172 new_deltas[name] = np.zeros(ens_length) 1173 for i_dat, dat in enumerate(d_extracted[name]): 1174 new_deltas[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1175 for name in new_cov_names: 1176 new_grad[name] = 0 1177 for i_dat, dat in enumerate(g_extracted[name]): 1178 new_grad[name] += np.tensordot(deriv[i_val + (i_dat, )], dat) 1179 else: 1180 for j_obs, obs in np.ndenumerate(data): 1181 for name in obs.names: 1182 if name in obs.cov_names: 1183 new_grad[name] = new_grad.get(name, 0) + deriv[i_val + j_obs] * obs.covobs[name].grad 1184 else: 1185 new_deltas[name] = new_deltas.get(name, 0) + deriv[i_val + j_obs] * _expand_deltas_for_merge(obs.deltas[name], obs.idl[name], obs.shape[name], new_idl_d[name]) 1186 1187 new_covobs = {name: Covobs(0, allcov[name], name, grad=new_grad[name]) for name in new_grad} 1188 1189 if not set(new_covobs.keys()).isdisjoint(new_deltas.keys()): 1190 raise Exception('The same name has been used for deltas and covobs!') 1191 new_samples = [] 1192 new_means = [] 1193 new_idl = [] 1194 new_names_obs = [] 1195 for name in new_names: 1196 if name not in new_covobs: 1197 if is_merged[name]: 1198 filtered_deltas, filtered_idl_d = _filter_zeroes(new_deltas[name], new_idl_d[name]) 1199 else: 1200 filtered_deltas = new_deltas[name] 1201 filtered_idl_d = new_idl_d[name] 1202 1203 new_samples.append(filtered_deltas) 1204 new_idl.append(filtered_idl_d) 1205 new_means.append(new_r_values[name][i_val]) 1206 new_names_obs.append(name) 1207 final_result[i_val] = Obs(new_samples, new_names_obs, means=new_means, idl=new_idl) 1208 for name in new_covobs: 1209 final_result[i_val].names.append(name) 1210 final_result[i_val]._covobs = new_covobs 1211 final_result[i_val]._value = new_val 1212 final_result[i_val].is_merged = is_merged 1213 final_result[i_val].reweighted = reweighted 1214 1215 if multi == 0: 1216 final_result = final_result.item() 1217 1218 return final_result
Construct a derived Obs according to func(data, **kwargs) using automatic differentiation.
Parameters
- func (object): arbitrary function of the form func(data, **kwargs). For the automatic differentiation to work, all numpy functions have to have the autograd wrapper (use 'import autograd.numpy as anp').
- data (list): list of Obs, e.g. [obs1, obs2, obs3].
- num_grad (bool): if True, numerical derivatives are used instead of autograd (default False). To control the numerical differentiation the kwargs of numdifftools.step_generators.MaxStepGenerator can be used.
- man_grad (list): manually supply a list or an array which contains the jacobian of func. Use cautiously, supplying the wrong derivative will not be intercepted.
Notes
For simple mathematical operations it can be practical to use anonymous functions. For the ratio of two observables one can e.g. use
new_obs = derived_observable(lambda x: x[0] / x[1], [obs1, obs2])
1258def reweight(weight, obs, **kwargs): 1259 """Reweight a list of observables. 1260 1261 Parameters 1262 ---------- 1263 weight : Obs 1264 Reweighting factor. An Observable that has to be defined on a superset of the 1265 configurations in obs[i].idl for all i. 1266 obs : list 1267 list of Obs, e.g. [obs1, obs2, obs3]. 1268 all_configs : bool 1269 if True, the reweighted observables are normalized by the average of 1270 the reweighting factor on all configurations in weight.idl and not 1271 on the configurations in obs[i].idl. 1272 """ 1273 result = [] 1274 for i in range(len(obs)): 1275 if len(obs[i].cov_names): 1276 raise Exception('Error: Not possible to reweight an Obs that contains covobs!') 1277 if not set(obs[i].names).issubset(weight.names): 1278 raise Exception('Error: Ensembles do not fit') 1279 for name in obs[i].names: 1280 if not set(obs[i].idl[name]).issubset(weight.idl[name]): 1281 raise Exception('obs[%d] has to be defined on a subset of the configs in weight.idl[%s]!' % (i, name)) 1282 new_samples = [] 1283 w_deltas = {} 1284 for name in sorted(obs[i].names): 1285 w_deltas[name] = _reduce_deltas(weight.deltas[name], weight.idl[name], obs[i].idl[name]) 1286 new_samples.append((w_deltas[name] + weight.r_values[name]) * (obs[i].deltas[name] + obs[i].r_values[name])) 1287 tmp_obs = Obs(new_samples, sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1288 1289 if kwargs.get('all_configs'): 1290 new_weight = weight 1291 else: 1292 new_weight = Obs([w_deltas[name] + weight.r_values[name] for name in sorted(obs[i].names)], sorted(obs[i].names), idl=[obs[i].idl[name] for name in sorted(obs[i].names)]) 1293 1294 result.append(derived_observable(lambda x, **kwargs: x[0] / x[1], [tmp_obs, new_weight], **kwargs)) 1295 result[-1].reweighted = True 1296 result[-1].is_merged = obs[i].is_merged 1297 1298 return result
Reweight a list of observables.
Parameters
- weight (Obs): Reweighting factor. An Observable that has to be defined on a superset of the configurations in obs[i].idl for all i.
- obs (list): list of Obs, e.g. [obs1, obs2, obs3].
- all_configs (bool): if True, the reweighted observables are normalized by the average of the reweighting factor on all configurations in weight.idl and not on the configurations in obs[i].idl.
1301def correlate(obs_a, obs_b): 1302 """Correlate two observables. 1303 1304 Parameters 1305 ---------- 1306 obs_a : Obs 1307 First observable 1308 obs_b : Obs 1309 Second observable 1310 1311 Notes 1312 ----- 1313 Keep in mind to only correlate primary observables which have not been reweighted 1314 yet. The reweighting has to be applied after correlating the observables. 1315 Currently only works if ensembles are identical (this is not strictly necessary). 1316 """ 1317 1318 if sorted(obs_a.names) != sorted(obs_b.names): 1319 raise Exception('Ensembles do not fit') 1320 if len(obs_a.cov_names) or len(obs_b.cov_names): 1321 raise Exception('Error: Not possible to correlate Obs that contain covobs!') 1322 for name in obs_a.names: 1323 if obs_a.shape[name] != obs_b.shape[name]: 1324 raise Exception('Shapes of ensemble', name, 'do not fit') 1325 if obs_a.idl[name] != obs_b.idl[name]: 1326 raise Exception('idl of ensemble', name, 'do not fit') 1327 1328 if obs_a.reweighted is True: 1329 warnings.warn("The first observable is already reweighted.", RuntimeWarning) 1330 if obs_b.reweighted is True: 1331 warnings.warn("The second observable is already reweighted.", RuntimeWarning) 1332 1333 new_samples = [] 1334 new_idl = [] 1335 for name in sorted(obs_a.names): 1336 new_samples.append((obs_a.deltas[name] + obs_a.r_values[name]) * (obs_b.deltas[name] + obs_b.r_values[name])) 1337 new_idl.append(obs_a.idl[name]) 1338 1339 o = Obs(new_samples, sorted(obs_a.names), idl=new_idl) 1340 o.is_merged = {name: (obs_a.is_merged.get(name, False) or obs_b.is_merged.get(name, False)) for name in o.names} 1341 o.reweighted = obs_a.reweighted or obs_b.reweighted 1342 return o
Correlate two observables.
Parameters
- obs_a (Obs): First observable
- obs_b (Obs): Second observable
Notes
Keep in mind to only correlate primary observables which have not been reweighted yet. The reweighting has to be applied after correlating the observables. Currently only works if ensembles are identical (this is not strictly necessary).
1345def covariance(obs, visualize=False, correlation=False, smooth=None, **kwargs): 1346 r'''Calculates the error covariance matrix of a set of observables. 1347 1348 The gamma method has to be applied first to all observables. 1349 1350 Parameters 1351 ---------- 1352 obs : list or numpy.ndarray 1353 List or one dimensional array of Obs 1354 visualize : bool 1355 If True plots the corresponding normalized correlation matrix (default False). 1356 correlation : bool 1357 If True the correlation matrix instead of the error covariance matrix is returned (default False). 1358 smooth : None or int 1359 If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue 1360 smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the 1361 largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely 1362 small ones. 1363 1364 Notes 1365 ----- 1366 The error covariance is defined such that it agrees with the squared standard error for two identical observables 1367 $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ 1368 in the absence of autocorrelation. 1369 The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite 1370 $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. 1371 For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. 1372 $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ 1373 This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors). 1374 ''' 1375 1376 length = len(obs) 1377 1378 max_samples = np.max([o.N for o in obs]) 1379 if max_samples <= length and not [item for sublist in [o.cov_names for o in obs] for item in sublist]: 1380 warnings.warn(f"The dimension of the covariance matrix ({length}) is larger or equal to the number of samples ({max_samples}). This will result in a rank deficient matrix.", RuntimeWarning) 1381 1382 cov = np.zeros((length, length)) 1383 for i in range(length): 1384 for j in range(i, length): 1385 cov[i, j] = _covariance_element(obs[i], obs[j]) 1386 cov = cov + cov.T - np.diag(np.diag(cov)) 1387 1388 corr = np.diag(1 / np.sqrt(np.diag(cov))) @ cov @ np.diag(1 / np.sqrt(np.diag(cov))) 1389 1390 if isinstance(smooth, int): 1391 corr = _smooth_eigenvalues(corr, smooth) 1392 1393 errors = [o.dvalue for o in obs] 1394 cov = np.diag(errors) @ corr @ np.diag(errors) 1395 1396 eigenvalues = np.linalg.eigh(cov)[0] 1397 if not np.all(eigenvalues >= 0): 1398 warnings.warn("Covariance matrix is not positive semi-definite (Eigenvalues: " + str(eigenvalues) + ")", RuntimeWarning) 1399 1400 if visualize: 1401 plt.matshow(corr, vmin=-1, vmax=1) 1402 plt.set_cmap('RdBu') 1403 plt.colorbar() 1404 plt.draw() 1405 1406 if correlation is True: 1407 return corr 1408 else: 1409 return cov
Calculates the error covariance matrix of a set of observables.
The gamma method has to be applied first to all observables.
Parameters
- obs (list or numpy.ndarray): List or one dimensional array of Obs
- visualize (bool): If True plots the corresponding normalized correlation matrix (default False).
- correlation (bool): If True the correlation matrix instead of the error covariance matrix is returned (default False).
- smooth (None or int): If smooth is an integer 'E' between 2 and the dimension of the matrix minus 1 the eigenvalue smoothing procedure of hep-lat/9412087 is applied to the correlation matrix which leaves the largest E eigenvalues essentially unchanged and smoothes the smaller eigenvalues to avoid extremely small ones.
Notes
The error covariance is defined such that it agrees with the squared standard error for two identical observables $$\operatorname{cov}(a,a)=\sum_{s=1}^N\delta_a^s\delta_a^s/N^2=\Gamma_{aa}(0)/N=\operatorname{var}(a)/N=\sigma_a^2$$ in the absence of autocorrelation. The error covariance is estimated by calculating the correlation matrix assuming no autocorrelation and then rescaling the correlation matrix by the full errors including the previous gamma method estimate for the autocorrelation of the observables. The covariance at windowsize 0 is guaranteed to be positive semi-definite $$\sum_{i,j}v_i\Gamma_{ij}(0)v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i,j}v_i\delta_i^s\delta_j^s v_j=\frac{1}{N}\sum_{s=1}^N\sum_{i}|v_i\delta_i^s|^2\geq 0\,,$$ for every $v\in\mathbb{R}^M$, while such an identity does not hold for larger windows/lags. For observables defined on a single ensemble our approximation is equivalent to assuming that the integrated autocorrelation time of an off-diagonal element is equal to the geometric mean of the integrated autocorrelation times of the corresponding diagonal elements. $$\tau_{\mathrm{int}, ij}=\sqrt{\tau_{\mathrm{int}, i}\times \tau_{\mathrm{int}, j}}$$ This construction ensures that the estimated covariance matrix is positive semi-definite (up to numerical rounding errors).
1488def import_jackknife(jacks, name, idl=None): 1489 """Imports jackknife samples and returns an Obs 1490 1491 Parameters 1492 ---------- 1493 jacks : numpy.ndarray 1494 numpy array containing the mean value as zeroth entry and 1495 the N jackknife samples as first to Nth entry. 1496 name : str 1497 name of the ensemble the samples are defined on. 1498 """ 1499 length = len(jacks) - 1 1500 prj = (np.ones((length, length)) - (length - 1) * np.identity(length)) 1501 samples = jacks[1:] @ prj 1502 mean = np.mean(samples) 1503 new_obs = Obs([samples - mean], [name], idl=idl, means=[mean]) 1504 new_obs._value = jacks[0] 1505 return new_obs
Imports jackknife samples and returns an Obs
Parameters
- jacks (numpy.ndarray): numpy array containing the mean value as zeroth entry and the N jackknife samples as first to Nth entry.
- name (str): name of the ensemble the samples are defined on.
1508def merge_obs(list_of_obs): 1509 """Combine all observables in list_of_obs into one new observable 1510 1511 Parameters 1512 ---------- 1513 list_of_obs : list 1514 list of the Obs object to be combined 1515 1516 Notes 1517 ----- 1518 It is not possible to combine obs which are based on the same replicum 1519 """ 1520 replist = [item for obs in list_of_obs for item in obs.names] 1521 if (len(replist) == len(set(replist))) is False: 1522 raise Exception('list_of_obs contains duplicate replica: %s' % (str(replist))) 1523 if any([len(o.cov_names) for o in list_of_obs]): 1524 raise Exception('Not possible to merge data that contains covobs!') 1525 new_dict = {} 1526 idl_dict = {} 1527 for o in list_of_obs: 1528 new_dict.update({key: o.deltas.get(key, 0) + o.r_values.get(key, 0) 1529 for key in set(o.deltas) | set(o.r_values)}) 1530 idl_dict.update({key: o.idl.get(key, 0) for key in set(o.deltas)}) 1531 1532 names = sorted(new_dict.keys()) 1533 o = Obs([new_dict[name] for name in names], names, idl=[idl_dict[name] for name in names]) 1534 o.is_merged = {name: np.any([oi.is_merged.get(name, False) for oi in list_of_obs]) for name in o.names} 1535 o.reweighted = np.max([oi.reweighted for oi in list_of_obs]) 1536 return o
Combine all observables in list_of_obs into one new observable
Parameters
- list_of_obs (list): list of the Obs object to be combined
Notes
It is not possible to combine obs which are based on the same replicum
1539def cov_Obs(means, cov, name, grad=None): 1540 """Create an Obs based on mean(s) and a covariance matrix 1541 1542 Parameters 1543 ---------- 1544 mean : list of floats or float 1545 N mean value(s) of the new Obs 1546 cov : list or array 1547 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance 1548 name : str 1549 identifier for the covariance matrix 1550 grad : list or array 1551 Gradient of the Covobs wrt. the means belonging to cov. 1552 """ 1553 1554 def covobs_to_obs(co): 1555 """Make an Obs out of a Covobs 1556 1557 Parameters 1558 ---------- 1559 co : Covobs 1560 Covobs to be embedded into the Obs 1561 """ 1562 o = Obs([], [], means=[]) 1563 o._value = co.value 1564 o.names.append(co.name) 1565 o._covobs[co.name] = co 1566 o._dvalue = np.sqrt(co.errsq()) 1567 return o 1568 1569 ol = [] 1570 if isinstance(means, (float, int)): 1571 means = [means] 1572 1573 for i in range(len(means)): 1574 ol.append(covobs_to_obs(Covobs(means[i], cov, name, pos=i, grad=grad))) 1575 if ol[0].covobs[name].N != len(means): 1576 raise Exception('You have to provide %d mean values!' % (ol[0].N)) 1577 if len(ol) == 1: 1578 return ol[0] 1579 return ol
Create an Obs based on mean(s) and a covariance matrix
Parameters
- mean (list of floats or float): N mean value(s) of the new Obs
- cov (list or array): 2d (NxN) Covariance matrix, 1d diagonal entries or 0d covariance
- name (str): identifier for the covariance matrix
- grad (list or array): Gradient of the Covobs wrt. the means belonging to cov.